[ard] Remove SportschauIE, which is now based on WDR (#9799)
[youtube-dl] / youtube_dl / extractor / vrt.py
index 57ef8dc300355f1c49b2be0a2ee4e4fa27b4e4ca..8e35f24e81e7e61240898ea3259914737365fd2f 100644 (file)
@@ -4,11 +4,14 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import float_or_none
+from ..utils import (
+    determine_ext,
+    float_or_none,
+)
 
 
 class VRTIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:deredactie|sporza|cobra)\.be/cm/(?:[^/]+/)+(?P<id>[^/]+)/*'
+    _VALID_URL = r'https?://(?:deredactie|sporza|cobra(?:\.canvas)?)\.be/cm/(?:[^/]+/)+(?P<id>[^/]+)/*'
     _TESTS = [
         # deredactie.be
         {
@@ -36,7 +39,7 @@ class VRTIE(InfoExtractor):
                 'timestamp': 1413835980.560,
                 'upload_date': '20141020',
                 'duration': 3238,
-            }  
+            }
         },
         # cobra.be
         {
@@ -52,6 +55,15 @@ class VRTIE(InfoExtractor):
                 'duration': 661,
             }
         },
+        {
+            # YouTube video
+            'url': 'http://deredactie.be/cm/vrtnieuws/videozone/nieuws/cultuurenmedia/1.2622957',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055',
+            'only_matching': True,
+        }
     ]
 
     def _real_extract(self, url):
@@ -62,18 +74,37 @@ class VRTIE(InfoExtractor):
         video_id = self._search_regex(
             r'data-video-id="([^"]+)_[^"]+"', webpage, 'video id', fatal=False)
 
+        src = self._search_regex(
+            r'data-video-src="([^"]+)"', webpage, 'video src', default=None)
+
+        video_type = self._search_regex(
+            r'data-video-type="([^"]+)"', webpage, 'video type', default=None)
+
+        if video_type == 'YouTubeVideo':
+            return self.url_result(src, 'Youtube')
+
         formats = []
+
         mobj = re.search(
             r'data-video-iphone-server="(?P<server>[^"]+)"\s+data-video-iphone-path="(?P<path>[^"]+)"',
             webpage)
         if mobj:
             formats.extend(self._extract_m3u8_formats(
                 '%s/%s' % (mobj.group('server'), mobj.group('path')),
-                video_id, 'mp4'))
-        mobj = re.search(r'data-video-src="(?P<src>[^"]+)"', webpage)
-        if mobj:
-            formats.extend(self._extract_f4m_formats(
-                '%s/manifest.f4m' % mobj.group('src'), video_id))
+                video_id, 'mp4', m3u8_id='hls', fatal=False))
+
+        if src:
+            if determine_ext(src) == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    src, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
+            else:
+                formats.extend(self._extract_f4m_formats(
+                    '%s/manifest.f4m' % src, video_id, f4m_id='hds', fatal=False))
+
+        if not formats and 'data-video-geoblocking="true"' in webpage:
+            self.raise_geo_restricted('This video is only available in Belgium')
+
         self._sort_formats(formats)
 
         title = self._og_search_title(webpage)
@@ -92,4 +123,4 @@ class VRTIE(InfoExtractor):
             'timestamp': timestamp,
             'duration': duration,
             'formats': formats,
-        }
\ No newline at end of file
+        }