[extractor/common] Support HTML media elements without child nodes
authorYen Chi Hsuan <yan12125@gmail.com>
Tue, 11 Oct 2016 17:40:28 +0000 (01:40 +0800)
committerYen Chi Hsuan <yan12125@gmail.com>
Tue, 11 Oct 2016 17:40:28 +0000 (01:40 +0800)
ChangeLog
youtube_dl/extractor/common.py

index 9a7e7133b44061505b197f0c6270808d176d7f0e..49488c8881d09bcafbce3b6168019238e8f8a969 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,7 @@
 version <unreleased>
 
 Core
++ Support HTML media elements without child nodes
 * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387)
 
 Extractors
index da192728f182dbe38c754337c83d5766112f123d..431cef831ab99fc726a0d6efc616d08dc19a9b92 100644 (file)
@@ -1802,7 +1802,11 @@ class InfoExtractor(object):
             return is_plain_url, formats
 
         entries = []
-        for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage):
+        media_tags = [(media_tag, media_type, '')
+                      for media_tag, media_type
+                      in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
+        media_tags.extend(re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage))
+        for media_tag, media_type, media_content in media_tags:
             media_info = {
                 'formats': [],
                 'subtitles': {},