[utils] Support TTML without default namespace

author Yen Chi Hsuan <yan12125@gmail.com>

Mon, 18 May 2015 16:45:01 +0000 (00:45 +0800)

committer Yen Chi Hsuan <yan12125@gmail.com>

Mon, 18 May 2015 16:45:01 +0000 (00:45 +0800)
author Yen Chi Hsuan <yan12125@gmail.com>
Mon, 18 May 2015 16:45:01 +0000 (00:45 +0800)
committer Yen Chi Hsuan <yan12125@gmail.com>
Mon, 18 May 2015 16:45:01 +0000 (00:45 +0800)
diff --git a/test/test_utils.py b/test/test_utils.py

index b401070371bfcea183abc2b08419ddd0c75dd3fe..e13e11b59f7f427e8c082f003c650268895ef6f3 100644 (file)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -621,6 +621,21 @@ Line
  '''
          self.assertEqual(dfxp2srt(dfxp_data), srt_data)
  
+        dfxp_data_no_default_namespace = '''<?xml version="1.0" encoding="UTF-8"?>
+            <tt xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
+            <body>
+                <div xml:lang="en">
+                    <p begin="0" end="1">The first line</p>
+                </div>
+            </body>
+            </tt>'''
+        srt_data = '''1
+00:00:00,000 --> 00:00:01,000
+The first line
+
+'''
+        self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
+
  
  if __name__ == '__main__':
      unittest.main()
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index ed9ed9ed63ec9b40d929f83cb2e56ee4d63f9e7f..507f073834eeb9e298c2a6f71e4082c8c4404a1c 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1848,9 +1848,9 @@ def dfxp2srt(dfxp_data):
          out = str_or_empty(node.text)
  
          for child in node:
-            if child.tag == _x('ttml:br'):
+            if child.tag in (_x('ttml:br'), 'br'):
                  out += '\n' + str_or_empty(child.tail)
-            elif child.tag == _x('ttml:span'):
+            elif child.tag in (_x('ttml:span'), 'span'):
                  out += str_or_empty(parse_node(child))
              else:
                  out += str_or_empty(xml.etree.ElementTree.tostring(child))
@@ -1859,7 +1859,10 @@ def dfxp2srt(dfxp_data):
  
      dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
      out = []
-    paras = dfxp.findall(_x('.//ttml:p'))
+    paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
+
+    if not paras:
+        raise ValueError('Invalid dfxp/TTML subtitle')
  
      for para, index in zip(paras, itertools.count(1)):
          begin_time = parse_dfxp_time_expr(para.attrib['begin'])
author	Yen Chi Hsuan <yan12125@gmail.com>
	Mon, 18 May 2015 16:45:01 +0000 (00:45 +0800)
committer	Yen Chi Hsuan <yan12125@gmail.com>
	Mon, 18 May 2015 16:45:01 +0000 (00:45 +0800)
test/test_utils.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history