[prosiebensat1] Improve extraction and remove 7tv.de support (#24948)
authorPhilipp Stehle <anderschwiedu@googlemail.com>
Thu, 23 Apr 2020 19:44:13 +0000 (21:44 +0200)
committerGitHub <noreply@github.com>
Thu, 23 Apr 2020 19:44:13 +0000 (02:44 +0700)
youtube_dl/extractor/prosiebensat1.py

index 1bc4f9b6b3a455e027f1e676f24c4f352888a0c4..74074606e75804fb2a1c77d1a2ad0a483a0727be 100644 (file)
@@ -175,7 +175,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
                         (?:
                             (?:beta\.)?
                             (?:
-                                prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia
+                                prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|advopedia
                             )\.(?:de|at|ch)|
                             ran\.de|fem\.com|advopedia\.de|galileo\.tv/video
                         )
@@ -193,7 +193,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
             'info_dict': {
                 'id': '2104602',
                 'ext': 'mp4',
-                'title': 'Episode 18 - Staffel 2',
+                'title': 'CIRCUS HALLIGALLI - Episode 18 - Staffel 2',
                 'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
                 'upload_date': '20131231',
                 'duration': 5845.04,
@@ -300,7 +300,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
             'info_dict': {
                 'id': '2572814',
                 'ext': 'mp4',
-                'title': 'Andreas Kümmert: Rocket Man',
+                'title': 'The Voice of Germany - Andreas Kümmert: Rocket Man',
                 'description': 'md5:6ddb02b0781c6adf778afea606652e38',
                 'upload_date': '20131017',
                 'duration': 469.88,
@@ -310,7 +310,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
             },
         },
         {
-            'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html',
+            'url': 'http://www.fem.com/videos/beauty-lifestyle/kurztrips-zum-valentinstag',
             'info_dict': {
                 'id': '2156342',
                 'ext': 'mp4',
@@ -332,19 +332,6 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
             'playlist_count': 2,
             'skip': 'This video is unavailable',
         },
-        {
-            'url': 'http://www.7tv.de/circus-halligalli/615-best-of-circus-halligalli-ganze-folge',
-            'info_dict': {
-                'id': '4187506',
-                'ext': 'mp4',
-                'title': 'Best of Circus HalliGalli',
-                'description': 'md5:8849752efd90b9772c9db6fdf87fb9e9',
-                'upload_date': '20151229',
-            },
-            'params': {
-                'skip_download': True,
-            },
-        },
         {
             # title in <h2 class="subtitle">
             'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip',
@@ -421,7 +408,6 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
         r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>',
     ]
     _UPLOAD_DATE_REGEXES = [
-        r'<meta property="og:published_time" content="(.+?)">',
         r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"',
         r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr',
         r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
@@ -451,8 +437,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
         if description is None:
             description = self._og_search_description(webpage)
         thumbnail = self._og_search_thumbnail(webpage)
-        upload_date = unified_strdate(self._html_search_regex(
-            self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None))
+        upload_date = unified_strdate(
+            self._html_search_meta('og:published_time', webpage,
+                                   'upload date', default=None)
+            or self._html_search_regex(self._UPLOAD_DATE_REGEXES,
+                                       webpage, 'upload date', default=None))
 
         info.update({
             'id': clip_id,