Merge pull request #8130 from dyn888/master
[youtube-dl] / youtube_dl / extractor / prosiebensat1.py
index 2f9d958009a8e35a9f7e764008dbee497c4b49c9..670e6950f3fcc67e78ecd466475e7317c8dfd826 100644 (file)
@@ -9,7 +9,9 @@ from ..compat import (
     compat_urllib_parse,
 )
 from ..utils import (
+    ExtractorError,
     determine_ext,
+    float_or_none,
     int_or_none,
     unified_strdate,
 )
@@ -18,14 +20,19 @@ from ..utils import (
 class ProSiebenSat1IE(InfoExtractor):
     IE_NAME = 'prosiebensat1'
     IE_DESC = 'ProSiebenSat.1 Digital'
-    _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany)\.(?:de|at)|ran\.de|fem\.com)/(?P<id>.+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)'
 
     _TESTS = [
         {
+            # Tests changes introduced in https://github.com/rg3/youtube-dl/pull/6242
+            # in response to fixing https://github.com/rg3/youtube-dl/issues/6215:
+            # - malformed f4m manifest support
+            # - proper handling of URLs starting with `https?://` in 2.0 manifests
+            # - recursive child f4m manifests extraction
             'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
             'info_dict': {
                 'id': '2104602',
-                'ext': 'mp4',
+                'ext': 'flv',
                 'title': 'Episode 18 - Staffel 2',
                 'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
                 'upload_date': '20131231',
@@ -131,14 +138,13 @@ class ProSiebenSat1IE(InfoExtractor):
             'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
             'info_dict': {
                 'id': '2572814',
-                'ext': 'mp4',
+                'ext': 'flv',
                 'title': 'Andreas Kümmert: Rocket Man',
                 'description': 'md5:6ddb02b0781c6adf778afea606652e38',
                 'upload_date': '20131017',
                 'duration': 469.88,
             },
             'params': {
-                # rtmp download
                 'skip_download': True,
             },
         },
@@ -146,13 +152,12 @@ class ProSiebenSat1IE(InfoExtractor):
             'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html',
             'info_dict': {
                 'id': '2156342',
-                'ext': 'mp4',
+                'ext': 'flv',
                 'title': 'Kurztrips zum Valentinstag',
-                'description': 'Romantischer Kurztrip zum Valentinstag? Wir verraten, was sich hier wirklich lohnt.',
+                'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.',
                 'duration': 307.24,
             },
             'params': {
-                # rtmp download
                 'skip_download': True,
             },
         },
@@ -165,12 +170,26 @@ class ProSiebenSat1IE(InfoExtractor):
             },
             'playlist_count': 2,
         },
+        {
+            'url': 'http://www.7tv.de/circus-halligalli/615-best-of-circus-halligalli-ganze-folge',
+            'info_dict': {
+                'id': '4187506',
+                'ext': 'flv',
+                'title': 'Best of Circus HalliGalli',
+                'description': 'md5:8849752efd90b9772c9db6fdf87fb9e9',
+                'upload_date': '20151229',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
     ]
 
     _CLIPID_REGEXES = [
         r'"clip_id"\s*:\s+"(\d+)"',
         r'clipid: "(\d+)"',
         r'clip[iI]d=(\d+)',
+        r'clip[iI]d\s*=\s*["\'](\d+)',
         r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
     ]
     _TITLE_REGEXES = [
@@ -179,12 +198,16 @@ class ProSiebenSat1IE(InfoExtractor):
         r'<!-- start video -->\s*<h1>(.+?)</h1>',
         r'<h1 class="att-name">\s*(.+?)</h1>',
         r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>',
+        r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>',
+        r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>',
     ]
     _DESCRIPTION_REGEXES = [
         r'<p itemprop="description">\s*(.+?)</p>',
         r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>',
         r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>',
         r'<p class="att-description">\s*(.+?)\s*</p>',
+        r'<p class="video-description" itemprop="description">\s*(.+?)</p>',
+        r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>',
     ]
     _UPLOAD_DATE_REGEXES = [
         r'<meta property="og:published_time" content="(.+?)">',
@@ -219,10 +242,13 @@ class ProSiebenSat1IE(InfoExtractor):
             'ids': clip_id,
         })
 
-        videos = self._download_json(videos_api_url, clip_id, 'Downloading videos JSON')
+        video = self._download_json(videos_api_url, clip_id, 'Downloading videos JSON')[0]
+
+        if video.get('is_protected') is True:
+            raise ExtractorError('This video is DRM protected.', expected=True)
 
-        duration = float(videos[0]['duration'])
-        source_ids = [source['id'] for source in videos[0]['sources']]
+        duration = float_or_none(video.get('duration'))
+        source_ids = [source['id'] for source in video['sources']]
         source_ids_str = ','.join(map(str, source_ids))
 
         g = '01!8d8F_)r9]4s[qeuXfP%'