X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Futils.py;h=463804e183117b23efd8f0e4b0ad9b132e519ddb;hb=3bb6165927c277c3af73d5ef1ffb6ce9ea663d10;hp=c18c9beedb85904578dad654c624d97ccc1f68a0;hpb=056d857571158264aefb8d9f7d47c0dad768be63;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index c18c9beed..463804e18 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -298,7 +298,8 @@ def clean_html(html): """Clean an HTML snippet into a readable string""" # Newline vs
html = html.replace('\n', ' ') - html = re.sub('\s*<\s*br\s*/?\s*>\s*', '\n', html) + html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html) + html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html) # Strip html tags html = re.sub('<.*?>', '', html) # Replace html entities