X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fnrk.py;h=bff36f9d3f24cad293144d8e216faf5eeefed92c;hb=674fb0fcc54c72448f80a0573f7fd116f220827e;hp=e950c76dd2f230e86ea94f974dd612ec0478f42e;hpb=799d88d3d8a9337fb0b7a858ec4ffb3aaacbc974;p=youtube-dl diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index e950c76dd..bff36f9d3 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -4,9 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, float_or_none, + parse_duration, unified_strdate, ) @@ -72,7 +74,7 @@ class NRKIE(InfoExtractor): class NRKTVIE(InfoExtractor): - _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P\d+))?' + _VALID_URL = r'(?Phttp://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P\d+))?' _TESTS = [ { @@ -111,9 +113,6 @@ class NRKTVIE(InfoExtractor): 'upload_date': '20150106', }, 'skip': 'Only works from Norway', - 'params': { - 'proxy': '127.0.0.1:8118', - }, }, { 'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', @@ -147,12 +146,33 @@ class NRKTVIE(InfoExtractor): 'duration': 6947.5199999999995, }, 'skip': 'Only works from Norway', - 'params': { - 'proxy': '127.0.0.1:8118', - }, } ] + def _debug_print(self, txt): + if self._downloader.params.get('verbose', False): + self.to_screen('[debug] %s' % txt) + + def _get_subtitles(self, subtitlesurl, video_id, baseurl): + url = "%s%s" % (baseurl, subtitlesurl) + self._debug_print('%s: Subtitle url: %s' % (video_id, url)) + captions = self._download_xml( + url, video_id, 'Downloading subtitles', + transform_source=lambda s: s.replace(r'
', '\r\n')) + lang = captions.get('lang', 'no') + ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}')) + srt = '' + for pos, p in enumerate(ps): + begin = parse_duration(p.get('begin')) + duration = parse_duration(p.get('dur')) + starttime = self._subtitles_timecode(begin) + endtime = self._subtitles_timecode(begin + duration) + srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text) + return {lang: [ + {'ext': 'ttml', 'url': url}, + {'ext': 'srt', 'data': srt}, + ]} + def _extract_f4m(self, manifest_url, video_id): return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id) @@ -160,6 +180,7 @@ class NRKTVIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') part_id = mobj.group('part_id') + baseurl = mobj.group('baseurl') webpage = self._download_webpage(url, video_id) @@ -216,9 +237,15 @@ class NRKTVIE(InfoExtractor): m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage) if m3u8_url: formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4')) - self._sort_formats(formats) + subtitles_url = self._html_search_regex( + r'data-subtitlesurl[ ]*=[ ]*"([^"]+)"', + webpage, 'subtitle URL', default=None) + subtitles = None + if subtitles_url: + subtitles = self.extract_subtitles(subtitles_url, video_id, baseurl) + return { 'id': video_id, 'title': title, @@ -227,4 +254,5 @@ class NRKTVIE(InfoExtractor): 'upload_date': upload_date, 'duration': duration, 'formats': formats, + 'subtitles': subtitles, }