From: Sergey M․ Date: Fri, 17 Jul 2015 16:59:17 +0000 (+0600) Subject: Merge branch 'atomicdryad-pr-crashfix_compat_urllib_unquote' X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=13fc7f3a05e0cb8b111e62540a4795611cc0a9dd;hp=14309e1ddc476a7e2fc444a0443b2fc23186a385;p=youtube-dl Merge branch 'atomicdryad-pr-crashfix_compat_urllib_unquote' --- diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 271bf8596..5a2d0d995 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -836,13 +836,14 @@ class InfoExtractor(object): self.to_screen(msg) time.sleep(timeout) - def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None): + def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None, + transform_source=lambda s: fix_xml_ampersands(s).strip()): manifest = self._download_xml( manifest_url, video_id, 'Downloading f4m manifest', 'Unable to download f4m manifest', # Some manifests may be malformed, e.g. prosiebensat1 generated manifests # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244) - transform_source=lambda s: fix_xml_ampersands(s).strip()) + transform_source=transform_source) formats = [] manifest_version = '1.0' diff --git a/youtube_dl/extractor/howstuffworks.py b/youtube_dl/extractor/howstuffworks.py index e97339121..663e6632a 100644 --- a/youtube_dl/extractor/howstuffworks.py +++ b/youtube_dl/extractor/howstuffworks.py @@ -10,7 +10,7 @@ from ..utils import ( class HowStuffWorksIE(InfoExtractor): - _VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*\d+-(?P.+?)-video\.htm' + _VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*(?:\d+-)?(?P.+?)-video\.htm' _TESTS = [ { 'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm', @@ -46,6 +46,10 @@ class HowStuffWorksIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', }, }, + { + 'url': 'http://shows.howstuffworks.com/stuff-to-blow-your-mind/optical-illusions-video.htm', + 'only_matching': True, + } ] def _real_extract(self, url):