from ..compat import (
compat_urllib_request,
compat_urllib_parse,
+ compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
)
from ..utils import (
if playlist_url == 'error_region':
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
- req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlist_url))
+ req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url))
req.add_header('Referer', url)
playlist = self._download_json(req, video_id)
duration = float_or_none(item.get('duration'))
thumbnail = item.get('previewImageUrl')
+ subtitles = {}
+ subs = item.get('subtitles')
+ if subs:
+ subtitles = self.extract_subtitles(episode_id, subs)
+
return {
'id': episode_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
+ 'subtitles': subtitles,
}
+
+ def _get_subtitles(self, episode_id, subs):
+ original_subtitles = self._download_webpage(
+ subs[0]['url'], episode_id, 'Downloading subtitles')
+ srt_subs = self._fix_subtitles(original_subtitles)
+ return {
+ 'cs': [{
+ 'ext': 'srt',
+ 'data': srt_subs,
+ }]
+ }
+
+ @staticmethod
+ def _fix_subtitles(subtitles):
+ """ Convert millisecond-based subtitles to SRT """
+
+ def _msectotimecode(msec):
+ """ Helper utility to convert milliseconds to timecode """
+ components = []
+ for divider in [1000, 60, 60, 100]:
+ components.append(msec % divider)
+ msec //= divider
+ return "{3:02}:{2:02}:{1:02},{0:03}".format(*components)
+
+ def _fix_subtitle(subtitle):
+ for line in subtitle.splitlines():
+ m = re.match(r"^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$", line)
+ if m:
+ yield m.group(1)
+ start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:])
+ yield "{0} --> {1}".format(start, stop)
+ else:
+ yield line
+
+ return "\r\n".join(_fix_subtitle(subtitles))