[extractor/common] Handle malformed f4m manifests
authorSergey M․ <dstftw@gmail.com>
Wed, 15 Jul 2015 19:14:08 +0000 (01:14 +0600)
committerSergey M․ <dstftw@gmail.com>
Wed, 15 Jul 2015 19:14:08 +0000 (01:14 +0600)
youtube_dl/extractor/common.py

index 3a396c0b0ae30032fe267a5ab88561b428e123d2..f8a5eccedd78bf23304866c47f3ea000e9e60245 100644 (file)
@@ -28,6 +28,7 @@ from ..utils import (
     clean_html,
     compiled_regex_type,
     ExtractorError,
+    fix_xml_ampersands,
     float_or_none,
     int_or_none,
     RegexNotFoundError,
@@ -837,7 +838,10 @@ class InfoExtractor(object):
     def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None):
         manifest = self._download_xml(
             manifest_url, video_id, 'Downloading f4m manifest',
-            'Unable to download f4m manifest')
+            'Unable to download f4m manifest',
+            # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
+            # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244)
+            transform_source=lambda s: fix_xml_ampersands(s).strip())
 
         formats = []
         manifest_version = '1.0'