projects
/
youtube-dl
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
815482d
)
[extractor/common] Speed-up media tags regex (closes #11979)
author
Sergey M․
<dstftw@gmail.com>
Sun, 5 Feb 2017 17:20:30 +0000
(
00:20
+0700)
committer
Sergey M․
<dstftw@gmail.com>
Sun, 5 Feb 2017 17:20:30 +0000
(
00:20
+0700)
youtube_dl/extractor/common.py
patch
|
blob
|
history
diff --git
a/youtube_dl/extractor/common.py
b/youtube_dl/extractor/common.py
index 2c8ec1417c21cb9397e34efe8dec7e0e5bca9e62..5191886227b085e7c082f8c577d8082fe4625cbb 100644
(file)
--- a/
youtube_dl/extractor/common.py
+++ b/
youtube_dl/extractor/common.py
@@
-1959,7
+1959,12
@@
class InfoExtractor(object):
media_tags = [(media_tag, media_type, '')
for media_tag, media_type
in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
media_tags = [(media_tag, media_type, '')
for media_tag, media_type
in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
- media_tags.extend(re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage))
+ media_tags.extend(re.findall(
+ # We only allow video|audio followed by a whitespace or '>'.
+ # Allowing more characters may end up in significant slow down (see
+ # https://github.com/rg3/youtube-dl/issues/11979, example URL:
+ # http://www.porntrex.com/maps/videositemap.xml).
+ r'(?s)(<(?P<tag>video|audio)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
for media_tag, media_type, media_content in media_tags:
media_info = {
'formats': [],
for media_tag, media_type, media_content in media_tags:
media_info = {
'formats': [],