[common] add helper method for Wowza Streaming Engine format extraction
authorRemita Amine <remitamine@gmail.com>
Fri, 16 Sep 2016 18:30:38 +0000 (19:30 +0100)
committerRemita Amine <remitamine@gmail.com>
Fri, 16 Sep 2016 18:30:38 +0000 (19:30 +0100)
youtube_dl/extractor/common.py
youtube_dl/extractor/vier.py
youtube_dl/extractor/vodplatform.py
youtube_dl/extractor/vrt.py

index 4f738b9fcf4d3cbfd553b1faaa8bced4e0acb5a3..c0002345870a4490cd3d5904d148b3d272813f28 100644 (file)
@@ -1795,6 +1795,49 @@ class InfoExtractor(object):
             m3u8_id='hls', fatal=False))
         return formats
 
+    def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
+        url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
+        url_base = self._search_regex(r'(?:https?|rtmp|rtsp)(://[^?]+)', url, 'format url')
+        http_base_url = 'http' + url_base
+        formats = []
+        if 'm3u8' not in skip_protocols:
+            formats.extend(self._extract_m3u8_formats(
+                http_base_url + '/playlist.m3u8', video_id, 'mp4',
+                m3u8_entry_protocol, m3u8_id='hls', fatal=False))
+        if 'f4m' not in skip_protocols:
+            formats.extend(self._extract_f4m_formats(
+                http_base_url + '/manifest.f4m',
+                video_id, f4m_id='hds', fatal=False))
+        if re.search(r'(?:/smil:|\.smil)', url_base):
+            if 'dash' not in skip_protocols:
+                formats.extend(self._extract_mpd_formats(
+                    http_base_url + '/manifest.mpd',
+                    video_id, mpd_id='dash', fatal=False))
+            if 'smil' not in skip_protocols:
+                rtmp_formats = self._extract_smil_formats(
+                    http_base_url + '/jwplayer.smil',
+                    video_id, fatal=False)
+                for rtmp_format in rtmp_formats:
+                    rtsp_format = rtmp_format.copy()
+                    rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
+                    del rtsp_format['play_path']
+                    del rtsp_format['ext']
+                    rtsp_format.update({
+                        'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
+                        'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
+                        'protocol': 'rtsp',
+                    })
+                    formats.extend([rtmp_format, rtsp_format])
+        else:
+            for protocol in ('rtmp', 'rtsp'):
+                if protocol not in skip_protocols:
+                    formats.append({
+                        'url': protocol + url_base,
+                        'format_id': protocol,
+                        'protocol': protocol,
+                    })
+        return formats
+
     def _live_title(self, name):
         """ Generate the title for a live video """
         now = datetime.datetime.now()
index 6645c6186dbff315e850f22ae793677803cbbf9b..dc142a245bcd765dc11200b8eb50fc342c5260f2 100644 (file)
@@ -48,8 +48,8 @@ class VierIE(InfoExtractor):
             [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'],
             webpage, 'filename')
 
-        playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename)
-        formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4')
+        playlist_url = 'http://vod.streamcloud.be/%s/_definst_/mp4:%s.mp4/playlist.m3u8' % (application, filename)
+        formats = self._extract_wowza_formats(playlist_url, display_id)
         self._sort_formats(formats)
 
         title = self._og_search_title(webpage, default=display_id)
index 7bdd8b1dcd4f93f1a47050286b221e4ec30635b9..239644340384b60c8e1a80d40b50cabbd0fd2c9e 100644 (file)
@@ -25,29 +25,8 @@ class VODPlatformIE(InfoExtractor):
         title = unescapeHTML(self._og_search_title(webpage))
         hidden_inputs = self._hidden_inputs(webpage)
 
-        base_url = self._search_regex(
-            '(.*/)(?:playlist.m3u8|manifest.mpd)',
-            hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'],
-            'base url')
-        formats = self._extract_m3u8_formats(
-            base_url + 'playlist.m3u8', video_id, 'mp4',
-            'm3u8_native', m3u8_id='hls', fatal=False)
-        formats.extend(self._extract_mpd_formats(
-            base_url + 'manifest.mpd', video_id,
-            mpd_id='dash', fatal=False))
-        rtmp_formats = self._extract_smil_formats(
-            base_url + 'jwplayer.smil', video_id, fatal=False)
-        for rtmp_format in rtmp_formats:
-            rtsp_format = rtmp_format.copy()
-            rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
-            del rtsp_format['play_path']
-            del rtsp_format['ext']
-            rtsp_format.update({
-                'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
-                'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
-                'protocol': 'rtsp',
-            })
-            formats.extend([rtmp_format, rtsp_format])
+        formats = self._extract_wowza_formats(
+            hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'], video_id, skip_protocols=['f4m', 'smil'])
         self._sort_formats(formats)
 
         return {
index bec7ab327008803f8609ea0e78e7d70577556940..00c72e34684f918e68fc859ad6ffb926efa04661 100644 (file)
@@ -5,7 +5,6 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
-    determine_ext,
     float_or_none,
 )
 
@@ -75,7 +74,6 @@ class VRTIE(InfoExtractor):
         },
         {
             'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055',
-            'md5': '',
             'info_dict': {
                 'id': '2377055',
                 'ext': 'mp4',
@@ -119,39 +117,17 @@ class VRTIE(InfoExtractor):
                 video_id, 'mp4', m3u8_id='hls', fatal=False))
 
         if src:
-            if determine_ext(src) == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
-                    src, video_id, 'mp4', entry_protocol='m3u8_native',
-                    m3u8_id='hls', fatal=False))
-                formats.extend(self._extract_f4m_formats(
-                    src.replace('playlist.m3u8', 'manifest.f4m'),
-                    video_id, f4m_id='hds', fatal=False))
-                if 'data-video-geoblocking="true"' not in webpage:
-                    rtmp_formats = self._extract_smil_formats(
-                        src.replace('playlist.m3u8', 'jwplayer.smil'),
-                        video_id, fatal=False)
-                    formats.extend(rtmp_formats)
-                    for rtmp_format in rtmp_formats:
-                        rtmp_format_c = rtmp_format.copy()
-                        rtmp_format_c['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
-                        del rtmp_format_c['play_path']
-                        del rtmp_format_c['ext']
-                        http_format = rtmp_format_c.copy()
+            formats = self._extract_wowza_formats(src, video_id)
+            if 'data-video-geoblocking="true"' not in webpage:
+                for f in formats:
+                    if f['url'].startswith('rtsp://'):
+                        http_format = f.copy()
                         http_format.update({
-                            'url': rtmp_format_c['url'].replace('rtmp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''),
-                            'format_id': rtmp_format['format_id'].replace('rtmp', 'http'),
+                            'url': f['url'].replace('rtsp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''),
+                            'format_id': f['format_id'].replace('rtsp', 'http'),
                             'protocol': 'http',
                         })
-                        rtsp_format = rtmp_format_c.copy()
-                        rtsp_format.update({
-                            'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
-                            'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
-                            'protocol': 'rtsp',
-                        })
-                        formats.extend([http_format, rtsp_format])
-            else:
-                formats.extend(self._extract_f4m_formats(
-                    '%s/manifest.f4m' % src, video_id, f4m_id='hds', fatal=False))
+                        formats.append(http_format)
 
         if not formats and 'data-video-geoblocking="true"' in webpage:
             self.raise_geo_restricted('This video is only available in Belgium')