[dfb] extract m3u8 formats
authorremitamine <remitamine@gmail.com>
Fri, 29 Apr 2016 18:21:17 +0000 (19:21 +0100)
committerremitamine <remitamine@gmail.com>
Fri, 29 Apr 2016 18:21:17 +0000 (19:21 +0100)
youtube_dl/extractor/dfb.py

index cdfeccacb447591f4dcc776a9c1a374a794fa5ba..a4d0448c26149429ebd7d5813f432b56bf0e6020 100644 (file)
@@ -12,39 +12,46 @@ class DFBIE(InfoExtractor):
 
     _TEST = {
         'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/',
-        # The md5 is different each time
+        'md5': 'ac0f98a52a330f700b4b3034ad240649',
         'info_dict': {
             'id': '11633',
             'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'U 19-EM: Stimmen zum Spiel gegen Russland',
             'upload_date': '20150714',
         },
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        display_id = mobj.group('display_id')
+        display_id, video_id = re.match(self._VALID_URL, url).groups()
 
-        webpage = self._download_webpage(url, display_id)
         player_info = self._download_xml(
             'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
             display_id)
         video_info = player_info.find('video')
-
-        f4m_info = self._download_xml(
-            self._proto_relative_url(video_info.find('url').text.strip()), display_id)
-        token_el = f4m_info.find('token')
-        manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
-        formats = self._extract_f4m_formats(manifest_url, display_id)
+        stream_access_url = self._proto_relative_url(video_info.find('url').text.strip())
+
+        formats = []
+        # see http://tv.dfb.de/player/js/ajax.js for the method to extract m3u8 formats
+        for sa_url in (stream_access_url, stream_access_url + '&area=&format=iphone'):
+            stream_access_info = self._download_xml(sa_url, display_id)
+            token_el = stream_access_info.find('token')
+            manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth']
+            if '.f4m' in manifest_url:
+                formats.extend(self._extract_f4m_formats(
+                    manifest_url + '&hdcore=3.2.0',
+                    display_id, f4m_id='hds', fatal=False))
+            else:
+                formats.extend(self._extract_m3u8_formats(
+                    manifest_url, display_id, 'mp4',
+                    'm3u8_native', m3u8_id='hls', fatal=False))
         self._sort_formats(formats)
 
         return {
             'id': video_id,
             'display_id': display_id,
             'title': video_info.find('title').text,
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'thumbnail': 'http://tv.dfb.de/images/%s_640x360.jpg' % video_id,
             'upload_date': unified_strdate(video_info.find('time_date').text),
             'formats': formats,
         }