[wdr] Relax media link regex (closes #14447)
[youtube-dl] / youtube_dl / extractor / wdr.py
index 11099982729ffddea6b2b5b56c5063f95ad5c4ec..621de1e1efb73a9a377a46fe0fa702e595c3cde5 100644 (file)
@@ -19,10 +19,16 @@ class WDRBaseIE(InfoExtractor):
     def _extract_wdr_video(self, webpage, display_id):
         # for wdr.de the data-extension is in a tag with the class "mediaLink"
         # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn"
-        # for wdrmaus it is in a link to the page in a multiline "videoLink"-tag
+        # for wdrmaus, in a tag with the class "videoButton" (previously a link
+        # to the page in a multiline "videoLink"-tag)
         json_metadata = self._html_search_regex(
-            r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"',
-            webpage, 'media link', default=None, flags=re.MULTILINE)
+            r'''(?sx)class=
+                    (?:
+                        (["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+|
+                        (["\'])videoLink\b.*?\2[\s]*>\n[^\n]*
+                    )data-extension=(["\'])(?P<data>(?:(?!\3).)+)\3
+            ''',
+            webpage, 'media link', default=None, group='data')
 
         if not json_metadata:
             return
@@ -32,7 +38,7 @@ class WDRBaseIE(InfoExtractor):
         jsonp_url = media_link_obj['mediaObj']['url']
 
         metadata = self._download_json(
-            jsonp_url, 'metadata', transform_source=strip_jsonp)
+            jsonp_url, display_id, transform_source=strip_jsonp)
 
         metadata_tracker_data = metadata['trackerData']
         metadata_media_resource = metadata['mediaResource']