[utils] dfxp2srt: make TTMLPElementParser inherit from object
[youtube-dl] / youtube_dl / utils.py
index 9c1c0e0bdf55040dc3b82d4393d50c3623e3b0de..4262ad6ac9cf62ad8fc48222d8d026dc38f80b7f 100644 (file)
@@ -1828,9 +1828,11 @@ def mimetype2ext(mt):
     _, _, res = mt.rpartition('/')
 
     return {
-        'x-ms-wmv': 'wmv',
-        'x-mp4-fragmented': 'mp4',
+        '3gpp': '3gp',
         'ttml+xml': 'ttml',
+        'x-flv': 'flv',
+        'x-mp4-fragmented': 'mp4',
+        'x-ms-wmv': 'wmv',
     }.get(res, res)
 
 
@@ -2015,20 +2017,27 @@ def dfxp2srt(dfxp_data):
         'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
     })
 
-    def parse_node(node):
-        str_or_empty = functools.partial(str_or_none, default='')
+    class TTMLPElementParser(object):
+        out = ''
 
-        out = str_or_empty(node.text)
+        def start(self, tag, attrib):
+            if tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
+                self.out += '\n'
 
-        for child in node:
-            if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
-                out += '\n' + str_or_empty(child.tail)
-            elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
-                out += str_or_empty(parse_node(child))
-            else:
-                out += str_or_empty(xml.etree.ElementTree.tostring(child))
+        def end(self, tag):
+            pass
+
+        def data(self, data):
+            self.out += data
 
-        return out
+        def close(self):
+            return self.out.strip()
+
+    def parse_node(node):
+        target = TTMLPElementParser()
+        parser = xml.etree.ElementTree.XMLParser(target=target)
+        parser.feed(xml.etree.ElementTree.tostring(node))
+        return parser.close()
 
     dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
     out = []