From: Yen Chi Hsuan Date: Sun, 21 Jun 2015 11:22:26 +0000 (+0800) Subject: [bbccouk] Remove TTML to srt conversion codes X-Git-Url: http://git.bitcoin.ninja/?a=commitdiff_plain;h=78294e6a9ce2c9a294d663ac79936df7353b9980;p=youtube-dl [bbccouk] Remove TTML to srt conversion codes It's broken. See #6038 --- diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py index 0305f88b5..5825d2867 100644 --- a/youtube_dl/extractor/bbccouk.py +++ b/youtube_dl/extractor/bbccouk.py @@ -251,26 +251,11 @@ class BBCCoUkIE(InfoExtractor): for connection in self._extract_connections(media): captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions') lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en') - ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}')) - srt = '' - - def _extract_text(p): - if p.text is not None: - stripped_text = p.text.strip() - if stripped_text: - return stripped_text - return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span')) - for pos, p in enumerate(ps): - srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p)) subtitles[lang] = [ { 'url': connection.get('href'), 'ext': 'ttml', }, - { - 'data': srt, - 'ext': 'srt', - }, ] return subtitles