X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fceskatelevize.py;h=65f6be62313dfc623cf1f9aa7adc52282872aade;hb=b26733ba7f376f8c9285ac7928534286622bbc7c;hp=ba8376338c6d2046951e18c34fb778c27d3e530c;hpb=02ec32a1efc18a4f09b2422dc8c5bfb397a7e6ee;p=youtube-dl diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index ba8376338..65f6be623 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -104,6 +104,11 @@ class CeskaTelevizeIE(InfoExtractor): duration = float_or_none(item.get('duration')) thumbnail = item.get('previewImageUrl') + subtitles = {} + subs = item.get('subtitles') + if subs: + subtitles = self.extract_subtitles(episode_id, subs) + return { 'id': episode_id, 'title': title, @@ -111,4 +116,40 @@ class CeskaTelevizeIE(InfoExtractor): 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, + 'subtitles': subtitles, } + + def _get_subtitles(self, episode_id, subs): + original_subtitles = self._download_webpage( + subs[0]['url'], episode_id, 'Downloading subtitles') + srt_subs = self._fix_subtitles(original_subtitles) + return { + 'cs': [{ + 'ext': 'srt', + 'data': srt_subs, + }] + } + + @staticmethod + def _fix_subtitles(subtitles): + """ Convert millisecond-based subtitles to SRT """ + + def _msectotimecode(msec): + """ Helper utility to convert milliseconds to timecode """ + components = [] + for divider in [1000, 60, 60, 100]: + components.append(msec % divider) + msec //= divider + return "{3:02}:{2:02}:{1:02},{0:03}".format(*components) + + def _fix_subtitle(subtitle): + for line in subtitle.splitlines(): + m = re.match(r"^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$", line) + if m: + yield m.group(1) + start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) + yield "{0} --> {1}".format(start, stop) + else: + yield line + + return "\r\n".join(_fix_subtitle(subtitles))