[ciscolive] Fix issues and improve extraction (closes #17984)
authorSergey M․ <dstftw@gmail.com>
Tue, 20 Nov 2018 22:25:43 +0000 (05:25 +0700)
committerSergey M․ <dstftw@gmail.com>
Tue, 20 Nov 2018 23:10:39 +0000 (06:10 +0700)
youtube_dl/extractor/ciscolive.py
youtube_dl/extractor/extractors.py

index 2db7aad2cd78dd15a2d4a10504111c35595a1d4e..32f64571379c9f3d9a810a7b065517dc2dfcf201 100644 (file)
@@ -1,84 +1,49 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
 from .common import InfoExtractor
 from ..compat import (
+    compat_parse_qs,
     compat_urllib_parse_urlparse,
-    compat_parse_qs
 )
 from ..utils import (
     clean_html,
+    float_or_none,
     int_or_none,
     try_get,
     urlencode_postdata,
 )
 
 
-class CiscoLiveIE(InfoExtractor):
-    IE_NAME = 'ciscolive'
-    _VALID_URL = r'(?:https?://)?ciscolive\.cisco\.com/on-demand-library/\??(?P<query>[^#]+)#/(?:session/(?P<id>.+))?$'
-    _TESTS = [
-        {
-            'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs',
-            'md5': 'c98acf395ed9c9f766941c70f5352e22',
-            'info_dict': {
-                'id': '5803694304001',
-                'ext': 'mp4',
-                'title': '13 Smart Automations to Monitor Your Cisco IOS Network',
-                'description': 'md5:ec4a436019e09a918dec17714803f7cc',
-                'timestamp': 1530305395,
-                'uploader_id': '5647924234001',
-                'upload_date': '20180629',
-                'location': '16B Mezz.',
-            },
-        },
-        {
-            'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/',
-            'md5': '993d4cf051f6174059328b1dce8e94bd',
-            'info_dict': {
-                'upload_date': '20180629',
-                'title': 'DevNet Panel-Applying Design Thinking to Building Products in Cisco',
-                'timestamp': 1530316421,
-                'uploader_id': '5647924234001',
-                'id': '5803751616001',
-                'description': 'md5:5f144575cd6848117fe2f756855b038b',
-                'location': 'WoS, DevNet Theater',
-                'ext': 'mp4',
-            },
-        },
-        {
-            'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/',
-            'md5': '80e0c3b87e373fe3a3316b934b8915bf',
-            'info_dict': {
-                'upload_date': '20180629',
-                'title': 'Beating the CCIE Routing & Switching',
-                'timestamp': 1530311842,
-                'uploader_id': '5647924234001',
-                'id': '5803735679001',
-                'description': 'md5:e71970799e92d7f5ff57ae23f64b0929',
-                'location': 'Tulúm 02',
-                'ext': 'mp4',
-            },
-        }
-    ]
-
+class CiscoLiveBaseIE(InfoExtractor):
     # These appear to be constant across all Cisco Live presentations
     # and are not tied to any user session or event
     RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s'
-    RAINFOCUS_APIPROFILEID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz'
-    RAINFOCUS_WIDGETID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye'
+    RAINFOCUS_API_PROFILE_ID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz'
+    RAINFOCUS_WIDGET_ID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye'
     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s'
 
+    HEADERS = {
+        'Origin': 'https://ciscolive.cisco.com',
+        'rfApiProfileId': RAINFOCUS_API_PROFILE_ID,
+        'rfWidgetId': RAINFOCUS_WIDGET_ID,
+    }
+
+    def _call_api(self, ep, rf_id, query, referrer):
+        headers = self.HEADERS.copy()
+        headers['Referer'] = referrer
+        return self._download_json(
+            self.RAINFOCUS_API_URL % ep, rf_id, data=urlencode_postdata(query),
+            headers=headers)
+
     def _parse_rf_item(self, rf_item):
-        ''' Parses metadata and passes to Brightcove extractor '''
         event_name = rf_item.get('eventName')
         title = rf_item['title']
         description = clean_html(rf_item.get('abstract'))
         presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName'])
         bc_id = rf_item['videos'][0]['url']
         bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id
-        duration = int_or_none(try_get(rf_item, lambda x: x['times'][0]['length']))
+        duration = float_or_none(try_get(rf_item, lambda x: x['times'][0]['length']))
         location = try_get(rf_item, lambda x: x['times'][0]['room'])
 
         if duration:
@@ -86,51 +51,76 @@ class CiscoLiveIE(InfoExtractor):
 
         return {
             '_type': 'url_transparent',
-            'creator': presenter_name,
+            'url': bc_url,
+            'ie_key': 'BrightcoveNew',
+            'title': title,
             'description': description,
             'duration': duration,
-            'ie_key': 'BrightcoveNew',
+            'creator': presenter_name,
             'location': location,
             'series': event_name,
-            'title': title,
-            'url': bc_url,
         }
 
-    def _check_bc_id_exists(self, rf_item):
-        ''' Checks for the existence of a Brightcove URL in an API result '''
-        bc_id = try_get(rf_item, lambda x: x['videos'][0]['url'])
-        if bc_id:
-            if bc_id.strip().isdigit():
-                return rf_item
+
+class CiscoLiveSessionIE(CiscoLiveBaseIE):
+    _VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/\??[^#]*#/session/(?P<id>[^/?&]+)'
+    _TEST = {
+        'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs',
+        'md5': 'c98acf395ed9c9f766941c70f5352e22',
+        'info_dict': {
+            'id': '5803694304001',
+            'ext': 'mp4',
+            'title': '13 Smart Automations to Monitor Your Cisco IOS Network',
+            'description': 'md5:ec4a436019e09a918dec17714803f7cc',
+            'timestamp': 1530305395,
+            'upload_date': '20180629',
+            'uploader_id': '5647924234001',
+            'location': '16B Mezz.',
+        },
+        'params': {
+            'proxy': '127.0.0.1:8118',
+        }
+    }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        HEADERS = {
-            'Origin': 'https://ciscolive.cisco.com',
-            'rfApiProfileId': self.RAINFOCUS_APIPROFILEID,
-            'rfWidgetId': self.RAINFOCUS_WIDGETID,
-            'Referer': url,
+        rf_id = self._match_id(url)
+        rf_result = self._call_api('session', rf_id, {'id': rf_id}, url)
+        return self._parse_rf_item(rf_result['items'][0])
+
+
+class CiscoLiveSearchIE(CiscoLiveBaseIE):
+    _VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/'
+    _TESTS = [{
+        'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/',
+        'info_dict': {
+            'title': 'Filter query',
+        },
+        'playlist_count': 5,
+        'params': {
+            'proxy': '127.0.0.1:8118',
         }
-        # Single session URL (single video)
-        if mobj.group('id'):
-            rf_id = mobj.group('id')
-            request = self.RAINFOCUS_API_URL % 'session'
-            data = urlencode_postdata({'id': rf_id})
-            rf_result = self._download_json(request, rf_id, data=data, headers=HEADERS)
-            rf_item = self._check_bc_id_exists(rf_result['items'][0])
-            return self._parse_rf_item(rf_item)
-        else:
-            # Filter query URL (multiple videos)
-            rf_query = compat_parse_qs((compat_urllib_parse_urlparse(url).query))
-            rf_query['type'] = 'session'
-            rf_query['size'] = 1000
-            data = urlencode_postdata(rf_query)
-            request = self.RAINFOCUS_API_URL % 'search'
-            rf_results = self._download_json(request, 'Filter query', data=data, headers=HEADERS)
-            entries = [
-                self._parse_rf_item(rf_item)
-                for rf_item
-                in rf_results['sectionList'][0]['items']
-                if self._check_bc_id_exists(rf_item)
-            ]
-            return self.playlist_result(entries, 'Filter query')
+    }, {
+        'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/',
+        'only_matching': True,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if CiscoLiveSessionIE.suitable(url) else super(CiscoLiveSearchIE, cls).suitable(url)
+
+    @staticmethod
+    def _check_bc_id_exists(rf_item):
+        return int_or_none(try_get(rf_item, lambda x: x['videos'][0]['url'])) is not None
+
+    def _real_extract(self, url):
+        rf_query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+        rf_query['type'] = 'session'
+        rf_query['size'] = 1000
+        rf_results = self._call_api('search', None, rf_query, url)
+        entries = [
+            self._parse_rf_item(rf_item)
+            for rf_item
+            in rf_results['sectionList'][0]['items']
+            if self._check_bc_id_exists(rf_item)
+        ]
+        return self.playlist_result(entries, playlist_title='Filter query')
index 2c5988a142e431bd9354ac00b1e3c10f991d3c43..60e6175b1dffecc605f3c1f97f93742cd92ffd09 100644 (file)
@@ -194,7 +194,10 @@ from .chirbit import (
     ChirbitProfileIE,
 )
 from .cinchcast import CinchcastIE
-from .ciscolive import CiscoLiveIE
+from .ciscolive import (
+    CiscoLiveSessionIE,
+    CiscoLiveSearchIE,
+)
 from .cjsw import CJSWIE
 from .cliphunter import CliphunterIE
 from .clippit import ClippitIE