[iprima] Improve extraction (closes #25138)
authorSergey M․ <dstftw@gmail.com>
Wed, 6 May 2020 16:20:14 +0000 (23:20 +0700)
committerSergey M․ <dstftw@gmail.com>
Wed, 6 May 2020 16:20:14 +0000 (23:20 +0700)
youtube_dl/extractor/iprima.py

index 11bbeb5922a9d85e05977196c822a076c8b45ec3..53a550c11e4407813deb12f646a0c714436862b5 100644 (file)
@@ -16,12 +16,22 @@ class IPrimaIE(InfoExtractor):
     _GEO_BYPASS = False
 
     _TESTS = [{
-        'url': 'http://play.iprima.cz/gondici-s-r-o-33',
+        'url': 'https://prima.iprima.cz/particka/92-epizoda',
         'info_dict': {
-            'id': 'p136534',
+            'id': 'p51388',
             'ext': 'mp4',
-            'title': 'Gondíci s. r. o. (34)',
-            'description': 'md5:16577c629d006aa91f59ca8d8e7f99bd',
+            'title': 'Partička (92)',
+            'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
+        },
+        'params': {
+            'skip_download': True,  # m3u8 download
+        },
+    }, {
+        'url': 'https://cnn.iprima.cz/videa/70-epizoda',
+        'info_dict': {
+            'id': 'p681554',
+            'ext': 'mp4',
+            'title': 'HLAVNÍ ZPRÁVY 3.5.2020',
         },
         'params': {
             'skip_download': True,  # m3u8 download
@@ -68,9 +78,15 @@ class IPrimaIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
+        title = self._og_search_title(
+            webpage, default=None) or self._search_regex(
+            r'<h1>([^<]+)', webpage, 'title')
+
         video_id = self._search_regex(
             (r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
-             r'data-product="([^"]+)">'),
+             r'data-product="([^"]+)">',
+             r'id=["\']player-(p\d+)"',
+             r'playerId\s*:\s*["\']player-(p\d+)'),
             webpage, 'real id')
 
         playerpage = self._download_webpage(
@@ -125,8 +141,8 @@ class IPrimaIE(InfoExtractor):
 
         return {
             'id': video_id,
-            'title': self._og_search_title(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'title': title,
+            'thumbnail': self._og_search_thumbnail(webpage, default=None),
             'formats': formats,
-            'description': self._og_search_description(webpage),
+            'description': self._og_search_description(webpage, default=None),
         }