[extractor/common] Support HTML media elements without child nodes
[youtube-dl] / youtube_dl / extractor / common.py
index da192728f182dbe38c754337c83d5766112f123d..431cef831ab99fc726a0d6efc616d08dc19a9b92 100644 (file)
@@ -1802,7 +1802,11 @@ class InfoExtractor(object):
             return is_plain_url, formats
 
         entries = []
-        for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage):
+        media_tags = [(media_tag, media_type, '')
+                      for media_tag, media_type
+                      in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
+        media_tags.extend(re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage))
+        for media_tag, media_type, media_content in media_tags:
             media_info = {
                 'formats': [],
                 'subtitles': {},