<p begin="3" dur="-1">Ignored, three</p>
</div>
</body>
- </tt>'''
+ </tt>'''.encode('utf-8')
srt_data = '''1
00:00:00,000 --> 00:00:01,000
The following line contains Chinese characters and special symbols
<p begin="0" end="1">The first line</p>
</div>
</body>
- </tt>'''
+ </tt>'''.encode('utf-8')
srt_data = '''1
00:00:00,000 --> 00:00:01,000
The first line
<p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p>
</div>
</body>
-</tt>'''
+</tt>'''.encode('utf-8')
srt_data = '''1
00:00:02,080 --> 00:00:05,839
<font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font>
'''
self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data)
+ dfxp_data_non_utf8 = '''<?xml version="1.0" encoding="UTF-16"?>
+ <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
+ <body>
+ <div xml:lang="en">
+ <p begin="0" end="1">Line 1</p>
+ <p begin="1" end="2">第二行</p>
+ </div>
+ </body>
+ </tt>'''.encode('utf-16')
+ srt_data = '''1
+00:00:00,000 --> 00:00:01,000
+Line 1
+
+2
+00:00:01,000 --> 00:00:02,000
+第二行
+
+'''
+ self.assertEqual(dfxp2srt(dfxp_data_non_utf8), srt_data)
+
def test_cli_option(self):
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
dfxp_file = old_file
srt_file = subtitles_filename(filename, lang, 'srt')
- with io.open(dfxp_file, 'rt', encoding='utf-8') as f:
+ with open(dfxp_file, 'rb') as f:
srt_data = dfxp2srt(f.read())
with io.open(srt_file, 'wt', encoding='utf-8') as f:
def dfxp2srt(dfxp_data):
+ '''
+ @param dfxp_data A bytes-like object containing DFXP data
+ @returns A unicode object containing converted SRT data
+ '''
LEGACY_NAMESPACES = (
- ('http://www.w3.org/ns/ttml', [
- 'http://www.w3.org/2004/11/ttaf1',
- 'http://www.w3.org/2006/04/ttaf1',
- 'http://www.w3.org/2006/10/ttaf1',
+ (b'http://www.w3.org/ns/ttml', [
+ b'http://www.w3.org/2004/11/ttaf1',
+ b'http://www.w3.org/2006/04/ttaf1',
+ b'http://www.w3.org/2006/10/ttaf1',
]),
- ('http://www.w3.org/ns/ttml#styling', [
- 'http://www.w3.org/ns/ttml#style',
+ (b'http://www.w3.org/ns/ttml#styling', [
+ b'http://www.w3.org/ns/ttml#style',
]),
)
for ns in v:
dfxp_data = dfxp_data.replace(ns, k)
- dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
+ dfxp = compat_etree_fromstring(dfxp_data)
out = []
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')