[rentv] Improve extraction (closes #15227)
authorSergey M․ <dstftw@gmail.com>
Sat, 21 Apr 2018 16:22:10 +0000 (23:22 +0700)
committerSergey M․ <dstftw@gmail.com>
Sat, 21 Apr 2018 16:22:30 +0000 (23:22 +0700)
youtube_dl/extractor/rentv.py

index df528b09e01abc1d53c146f021ac2ea9341e697c..8bcf87126b18dd82f4c08bc7a9c586b0f99f112e 100644 (file)
@@ -3,6 +3,10 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..compat import compat_str
+from ..utils import (
+    determine_ext,
+    int_or_none,
+)
 
 
 class RENTVIE(InfoExtractor):
@@ -13,7 +17,9 @@ class RENTVIE(InfoExtractor):
         'info_dict': {
             'id': '118577',
             'ext': 'mp4',
-            'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"'
+            'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"',
+            'timestamp': 1472230800,
+            'upload_date': '20160826',
         }
     }, {
         'url': 'http://ren.tv/player/118577',
@@ -27,18 +33,31 @@ class RENTVIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage('http://ren.tv/player/' + video_id, video_id)
         config = self._parse_json(self._search_regex(
-            r'config\s*=\s*({.+});', webpage, 'config'), video_id)
+            r'config\s*=\s*({.+})\s*;', webpage, 'config'), video_id)
+        title = config['title']
         formats = []
-        for video in config.get('src', ''):
-            formats.append({
-                'url': video.get('src', '')
-            })
+        for video in config['src']:
+            src = video.get('src')
+            if not src or not isinstance(src, compat_str):
+                continue
+            ext = determine_ext(src)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    src, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
+            else:
+                formats.append({
+                    'url': src,
+                })
         self._sort_formats(formats)
         return {
             'id': video_id,
+            'title': title,
+            'description': config.get('description'),
+            'thumbnail': config.get('image'),
+            'duration': int_or_none(config.get('duration')),
+            'timestamp': int_or_none(config.get('date')),
             'formats': formats,
-            'title': config.get('title', ''),
-            'thumbnail': config.get('image', '')
         }