[arte.tv:+7] Fix extraction (fixes #8427)
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Thu, 4 Feb 2016 19:16:47 +0000 (20:16 +0100)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Thu, 4 Feb 2016 19:16:47 +0000 (20:16 +0100)
youtube_dl/extractor/arte.py

index b9e07f0ef9e6d173cc04e5b7831691a4b343409a..6ed855a579479dc868f7854f46ade1f1e11dda11 100644 (file)
@@ -13,6 +13,7 @@ from ..utils import (
     unified_strdate,
     get_element_by_attribute,
     int_or_none,
+    NO_DEFAULT,
     qualities,
 )
 
@@ -93,9 +94,18 @@ class ArteTVPlus7IE(InfoExtractor):
         json_url = self._html_search_regex(
             patterns, webpage, 'json vp url', default=None)
         if not json_url:
-            iframe_url = self._html_search_regex(
-                r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
-                webpage, 'iframe url', group='url')
+            def find_iframe_url(webpage, default=NO_DEFAULT):
+                return self._html_search_regex(
+                    r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
+                    webpage, 'iframe url', group='url', default=default)
+
+            iframe_url = find_iframe_url(webpage, None)
+            if not iframe_url:
+                embed_url = self._html_search_regex(
+                    r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url')
+                player = self._download_json(
+                    embed_url, video_id, 'Downloading player page')
+                iframe_url = find_iframe_url(player['html'])
             json_url = compat_parse_qs(
                 compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
         return self._extract_from_json_url(json_url, video_id, lang)