[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / noz.py
index 0ffb44b47eb673821f013e6696d495226711c2dd..ccafd77232b5f1e69f61dd5a99e904c7775a51a7 100644 (file)
@@ -2,10 +2,15 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
+from ..compat import (
+    compat_urllib_parse_unquote,
+    compat_xpath,
+)
 from ..utils import (
     int_or_none,
+    find_xpath_attr,
     xpath_text,
+    update_url_query,
 )
 
 
@@ -19,7 +24,7 @@ class NozIE(InfoExtractor):
             'duration': 215,
             'title': '3:2 - Deutschland gewinnt Badminton-Länderspiel in Melle',
             'description': 'Vor rund 370 Zuschauern gewinnt die deutsche Badminton-Nationalmannschaft am Donnerstag ein EM-Vorbereitungsspiel gegen Frankreich in Melle. Video Moritz Frankenberg.',
-            'thumbnail': 're:^http://.*\.jpg',
+            'thumbnail': r're:^http://.*\.jpg',
         },
     }]
 
@@ -45,18 +50,33 @@ class NozIE(InfoExtractor):
         duration = int_or_none(xpath_text(
             doc, './/article/movie/file/duration'))
         formats = []
-        for qnode in doc.findall('.//article/movie/file/qualities/qual'):
-            video_node = qnode.find('./html_urls/video_url[@format="video/mp4"]')
-            if video_node is None:
-                continue  # auto
-            formats.append({
-                'url': video_node.text,
-                'format_name': xpath_text(qnode, './name'),
-                'format_id': xpath_text(qnode, './id'),
-                'height': int_or_none(xpath_text(qnode, './height')),
-                'width': int_or_none(xpath_text(qnode, './width')),
-                'tbr': int_or_none(xpath_text(qnode, './bitrate'), scale=1000),
-            })
+        for qnode in doc.findall(compat_xpath('.//article/movie/file/qualities/qual')):
+            http_url_ele = find_xpath_attr(
+                qnode, './html_urls/video_url', 'format', 'video/mp4')
+            http_url = http_url_ele.text if http_url_ele is not None else None
+            if http_url:
+                formats.append({
+                    'url': http_url,
+                    'format_name': xpath_text(qnode, './name'),
+                    'format_id': '%s-%s' % ('http', xpath_text(qnode, './id')),
+                    'height': int_or_none(xpath_text(qnode, './height')),
+                    'width': int_or_none(xpath_text(qnode, './width')),
+                    'tbr': int_or_none(xpath_text(qnode, './bitrate'), scale=1000),
+                })
+            else:
+                f4m_url = xpath_text(qnode, 'url_hd2')
+                if f4m_url:
+                    formats.extend(self._extract_f4m_formats(
+                        update_url_query(f4m_url, {'hdcore': '3.4.0'}),
+                        video_id, f4m_id='hds', fatal=False))
+                m3u8_url_ele = find_xpath_attr(
+                    qnode, './html_urls/video_url',
+                    'format', 'application/vnd.apple.mpegurl')
+                m3u8_url = m3u8_url_ele.text if m3u8_url_ele is not None else None
+                if m3u8_url:
+                    formats.extend(self._extract_m3u8_formats(
+                        m3u8_url, video_id, 'mp4', 'm3u8_native',
+                        m3u8_id='hls', fatal=False))
         self._sort_formats(formats)
 
         return {