X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Ftheplatform.py;h=de236bbba899837f87a748cb7aab6cb8182b77c4;hb=485047854376465f95309daad4966971f56728ef;hp=0405bd6b0f66a65e7278d74f555f43c1c5beeed7;hpb=f5a723a78a2d4e395fca89e5b3bed53334b9385e;p=youtube-dl diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 0405bd6b0..de236bbba 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -80,14 +80,33 @@ class ThePlatformBaseIE(OnceIE): 'url': src, }) + duration = info.get('duration') + tp_chapters = info.get('chapters', []) + chapters = [] + if tp_chapters: + def _add_chapter(start_time, end_time): + start_time = float_or_none(start_time, 1000) + end_time = float_or_none(end_time, 1000) + if start_time is None or end_time is None: + return + chapters.append({ + 'start_time': start_time, + 'end_time': end_time, + }) + + for chapter in tp_chapters[:-1]: + _add_chapter(chapter.get('startTime'), chapter.get('endTime')) + _add_chapter(tp_chapters[-1].get('startTime'), tp_chapters[-1].get('endTime') or duration) + return { 'title': info['title'], 'subtitles': subtitles, 'description': info['description'], 'thumbnail': info['defaultThumbnailUrl'], - 'duration': int_or_none(info.get('duration'), 1000), + 'duration': float_or_none(duration, 1000), 'timestamp': int_or_none(info.get('pubDate'), 1000) or None, 'uploader': info.get('billingCode'), + 'chapters': chapters, } def _extract_theplatform_metadata(self, path, video_id): @@ -156,7 +175,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): 'title': 'iPhone Siri’s sassy response to a math question has people talking', 'description': 'md5:a565d1deadd5086f3331d57298ec6333', 'duration': 83.0, - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1435752600, 'upload_date': '20150701', 'uploader': 'NBCU-NEWS', @@ -179,10 +198,12 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): if m: return [m.group('url')] + # Are whitesapces ignored in URLs? + # https://github.com/rg3/youtube-dl/issues/12044 matches = re.findall( - r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage) + r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage) if matches: - return list(zip(*matches))[1] + return [re.sub(r'\s', '', list(zip(*matches))[1][0])] @staticmethod def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False): @@ -297,7 +318,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE): 'ext': 'mp4', 'title': 'The Biden factor: will Joe run in 2016?', 'description': 'Could Vice President Joe Biden be preparing a 2016 campaign? Mark Halperin and Sam Stein weigh in.', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20140208', 'timestamp': 1391824260, 'duration': 467.0, @@ -306,9 +327,10 @@ class ThePlatformFeedIE(ThePlatformBaseIE): }, }] - def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}): + def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None): real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query) entry = self._download_json(real_url, video_id)['entries'][0] + main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else None formats = [] subtitles = {} @@ -333,7 +355,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE): if asset_type in asset_types_query: query.update(asset_types_query[asset_type]) cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query( - smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type) + main_smil_url or smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type) formats.extend(cur_formats) subtitles = self._merge_subtitles(subtitles, cur_subtitles)