X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fciscolive.py;h=da404e4dc57b997834a61d217b39e84d497db173;hb=HEAD;hp=32f64571379c9f3d9a810a7b065517dc2dfcf201;hpb=6a6d7f064178427d28986884524bd3434f0ca957;p=youtube-dl diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index 32f645713..da404e4dc 100644 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools + from .common import InfoExtractor from ..compat import ( compat_parse_qs, @@ -29,12 +31,12 @@ class CiscoLiveBaseIE(InfoExtractor): 'rfWidgetId': RAINFOCUS_WIDGET_ID, } - def _call_api(self, ep, rf_id, query, referrer): + def _call_api(self, ep, rf_id, query, referrer, note=None): headers = self.HEADERS.copy() headers['Referer'] = referrer return self._download_json( - self.RAINFOCUS_API_URL % ep, rf_id, data=urlencode_postdata(query), - headers=headers) + self.RAINFOCUS_API_URL % ep, rf_id, note=note, + data=urlencode_postdata(query), headers=headers) def _parse_rf_item(self, rf_item): event_name = rf_item.get('eventName') @@ -63,8 +65,8 @@ class CiscoLiveBaseIE(InfoExtractor): class CiscoLiveSessionIE(CiscoLiveBaseIE): - _VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/\??[^#]*#/session/(?P[^/?&]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?ciscolive(?:\.cisco)?\.com/[^#]*#/session/(?P[^/?&]+)' + _TESTS = [{ 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs', 'md5': 'c98acf395ed9c9f766941c70f5352e22', 'info_dict': { @@ -77,10 +79,13 @@ class CiscoLiveSessionIE(CiscoLiveBaseIE): 'uploader_id': '5647924234001', 'location': '16B Mezz.', }, - 'params': { - 'proxy': '127.0.0.1:8118', - } - } + }, { + 'url': 'https://www.ciscolive.com/global/on-demand-library.html?search.event=ciscoliveemea2019#/session/15361595531500013WOU', + 'only_matching': True, + }, { + 'url': 'https://www.ciscolive.com/global/on-demand-library.html?#/session/1490051371645001kNaS', + 'only_matching': True, + }] def _real_extract(self, url): rf_id = self._match_id(url) @@ -89,19 +94,19 @@ class CiscoLiveSessionIE(CiscoLiveBaseIE): class CiscoLiveSearchIE(CiscoLiveBaseIE): - _VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/' + _VALID_URL = r'https?://(?:www\.)?ciscolive(?:\.cisco)?\.com/(?:global/)?on-demand-library(?:\.html|/)' _TESTS = [{ 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/', 'info_dict': { - 'title': 'Filter query', + 'title': 'Search query', }, 'playlist_count': 5, - 'params': { - 'proxy': '127.0.0.1:8118', - } }, { 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/', 'only_matching': True, + }, { + 'url': 'https://www.ciscolive.com/global/on-demand-library.html?search.technicallevel=scpsSkillLevel_aintroductory&search.event=ciscoliveemea2019&search.technology=scpsTechnology_dataCenter&search.focus=scpsSessionFocus_bestPractices#/', + 'only_matching': True, }] @classmethod @@ -112,15 +117,35 @@ class CiscoLiveSearchIE(CiscoLiveBaseIE): def _check_bc_id_exists(rf_item): return int_or_none(try_get(rf_item, lambda x: x['videos'][0]['url'])) is not None + def _entries(self, query, url): + query['size'] = 50 + query['from'] = 0 + for page_num in itertools.count(1): + results = self._call_api( + 'search', None, query, url, + 'Downloading search JSON page %d' % page_num) + sl = try_get(results, lambda x: x['sectionList'][0], dict) + if sl: + results = sl + items = results.get('items') + if not items or not isinstance(items, list): + break + for item in items: + if not isinstance(item, dict): + continue + if not self._check_bc_id_exists(item): + continue + yield self._parse_rf_item(item) + size = int_or_none(results.get('size')) + if size is not None: + query['size'] = size + total = int_or_none(results.get('total')) + if total is not None and query['from'] + query['size'] > total: + break + query['from'] += query['size'] + def _real_extract(self, url): - rf_query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) - rf_query['type'] = 'session' - rf_query['size'] = 1000 - rf_results = self._call_api('search', None, rf_query, url) - entries = [ - self._parse_rf_item(rf_item) - for rf_item - in rf_results['sectionList'][0]['items'] - if self._check_bc_id_exists(rf_item) - ] - return self.playlist_result(entries, playlist_title='Filter query') + query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + query['type'] = 'session' + return self.playlist_result( + self._entries(query, url), playlist_title='Search query')