[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / iprima.py
index 11a6629d249e3bd849475b16a00b0efa4bda283d..648ae6741f11f27ae297aed06f41b0584972b395 100644 (file)
@@ -12,16 +12,26 @@ from ..utils import (
 
 
 class IPrimaIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:play|prima|www)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
     _GEO_BYPASS = False
 
     _TESTS = [{
-        'url': 'http://play.iprima.cz/gondici-s-r-o-33',
+        'url': 'https://prima.iprima.cz/particka/92-epizoda',
         'info_dict': {
-            'id': 'p136534',
+            'id': 'p51388',
             'ext': 'mp4',
-            'title': 'Gondíci s. r. o. (34)',
-            'description': 'md5:16577c629d006aa91f59ca8d8e7f99bd',
+            'title': 'Partička (92)',
+            'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
+        },
+        'params': {
+            'skip_download': True,  # m3u8 download
+        },
+    }, {
+        'url': 'https://cnn.iprima.cz/videa/70-epizoda',
+        'info_dict': {
+            'id': 'p681554',
+            'ext': 'mp4',
+            'title': 'HLAVNÍ ZPRÁVY 3.5.2020',
         },
         'params': {
             'skip_download': True,  # m3u8 download
@@ -44,6 +54,21 @@ class IPrimaIE(InfoExtractor):
     }, {
         'url': 'http://www.iprima.cz/filmy/desne-rande',
         'only_matching': True,
+    }, {
+        'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby',
+        'only_matching': True,
+    }, {
+        'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy',
+        'only_matching': True,
+    }, {
+        'url': 'https://cool.iprima.cz/derava-silnice-nevadi',
+        'only_matching': True,
+    }, {
+        'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi',
+        'only_matching': True,
+    }, {
+        'url': 'https://autosalon.iprima.cz/motorsport/7-epizoda-1',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -53,9 +78,16 @@ class IPrimaIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
+        title = self._og_search_title(
+            webpage, default=None) or self._search_regex(
+            r'<h1>([^<]+)', webpage, 'title')
+
         video_id = self._search_regex(
             (r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
-             r'data-product="([^"]+)">'),
+             r'data-product="([^"]+)">',
+             r'id=["\']player-(p\d+)"',
+             r'playerId\s*:\s*["\']player-(p\d+)',
+             r'\bvideos\s*=\s*["\'](p\d+)'),
             webpage, 'real id')
 
         playerpage = self._download_webpage(
@@ -110,8 +142,8 @@ class IPrimaIE(InfoExtractor):
 
         return {
             'id': video_id,
-            'title': self._og_search_title(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'title': title,
+            'thumbnail': self._og_search_thumbnail(webpage, default=None),
             'formats': formats,
-            'description': self._og_search_description(webpage),
+            'description': self._og_search_description(webpage, default=None),
         }