From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 17:40:28 +0000 (+0800) Subject: [extractor/common] Support HTML media elements without child nodes X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=cea364f70c97dad933fa38698f3c9df1bdb485cf;p=youtube-dl [extractor/common] Support HTML media elements without child nodes --- diff --git a/ChangeLog b/ChangeLog index 9a7e7133b..49488c888 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Core ++ Support HTML media elements without child nodes * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index da192728f..431cef831 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1802,7 +1802,11 @@ class InfoExtractor(object): return is_plain_url, formats entries = [] - for media_tag, media_type, media_content in re.findall(r'(?s)(<(?Pvideo|audio)[^>]*>)(.*?)', webpage): + media_tags = [(media_tag, media_type, '') + for media_tag, media_type + in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)] + media_tags.extend(re.findall(r'(?s)(<(?Pvideo|audio)[^>]*>)(.*?)', webpage)) + for media_tag, media_type, media_content in media_tags: media_info = { 'formats': [], 'subtitles': {},