From c8e170b2092f5e2ad9ea8fd7fb2eedd35e307a1c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Jul 2016 17:56:11 +0700 Subject: [PATCH] [lcp] Improve extraction --- youtube_dl/extractor/lcp.py | 81 ++++++++++++++++++++++++++++++------- 1 file changed, 66 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/lcp.py b/youtube_dl/extractor/lcp.py index 38d7502df..ade27a99e 100644 --- a/youtube_dl/extractor/lcp.py +++ b/youtube_dl/extractor/lcp.py @@ -1,39 +1,90 @@ # coding: utf-8 from __future__ import unicode_literals + from .common import InfoExtractor +from .arkena import ArkenaIE + + +class LcpPlayIE(ArkenaIE): + _VALID_URL = r'https?://play\.lcp\.fr/embed/(?P[^/]+)/(?P[^/]+)/[^/]+/[^/]+' + _TESTS = [{ + 'url': 'http://play.lcp.fr/embed/327336/131064/darkmatter/0', + 'md5': 'b8bd9298542929c06c1c15788b1f277a', + 'info_dict': { + 'id': '327336', + 'ext': 'mp4', + 'title': '327336', + 'timestamp': 1456391602, + 'upload_date': '20160225', + }, + 'params': { + 'skip_download': True, + }, + }] + class LcpIE(InfoExtractor): - IE_NAME = 'LCP' - _VALID_URL = r'https?://(?:www\.)?lcp\.fr/(?:[^\/]+/)*(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?lcp\.fr/(?:[^/]+/)*(?P[^/]+)' _TESTS = [{ + # arkena embed 'url': 'http://www.lcp.fr/la-politique-en-video/schwartzenberg-prg-preconise-francois-hollande-de-participer-une-primaire', - 'md5': 'ab96c4dae94322ece1e98d97c8dc7807', + 'md5': 'b8bd9298542929c06c1c15788b1f277a', 'info_dict': { 'id': 'd56d03e9', - 'url': 're:http://httpod.scdn.arkena.com/11970/d56d03e9_.*', 'ext': 'mp4', - 'title': 'd56d03e9', + 'title': 'Schwartzenberg (PRG) préconise à François Hollande de participer à une primaire à gauche', + 'description': 'md5:96ad55009548da9dea19f4120c6c16a8', + 'timestamp': 1456488895, 'upload_date': '20160226', - 'timestamp': 1456488895 - } + }, + 'params': { + 'skip_download': True, + }, }, { + # dailymotion live stream 'url': 'http://www.lcp.fr/le-direct', 'info_dict': { - 'title': 'Le direct | LCP Assembl\xe9e nationale', - 'id': 'le-direct', + 'id': 'xji3qy', + 'ext': 'mp4', + 'title': 'La Chaine Parlementaire (LCP), Live TNT', + 'description': 'md5:5c69593f2de0f38bd9a949f2c95e870b', + 'uploader': 'LCP', + 'uploader_id': 'xbz33d', + 'timestamp': 1308923058, + 'upload_date': '20110624', + }, + 'params': { + # m3u8 live stream + 'skip_download': True, }, - 'playlist_mincount': 1 + }, { + 'url': 'http://www.lcp.fr/emissions/277792-les-volontaires', + 'only_matching': True, }] def _real_extract(self, url): display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) - embed_url_regex = r'"(?P(?:https?://(?:www\.)?)?play\.lcp\.fr/embed/[A-za-z0-9]+/[A-za-z0-9]+/[A-za-z0-9]+/[A-za-z0-9]+)"' - embed_url = self._html_search_regex(embed_url_regex, webpage, 'player_url', default=None, fatal=False) - if not embed_url: + play_url = self._search_regex( + r']+src=(["\'])(?P%s?(?:(?!\1).)*)\1' % LcpPlayIE._VALID_URL, + webpage, 'play iframe', default=None, group='url') + + if not play_url: return self.url_result(url, 'Generic') - title = self._og_search_title(webpage, default=None) - return self.url_result(embed_url, 'ArkenaPlay', video_id=display_id, video_title=title) + title = self._og_search_title(webpage, default=None) or self._html_search_meta( + 'twitter:title', webpage, fatal=True) + description = self._html_search_meta( + ('description', 'twitter:description'), webpage) + + return { + '_type': 'url_transparent', + 'ie_key': LcpPlayIE.ie_key(), + 'url': play_url, + 'display_id': display_id, + 'title': title, + 'description': description, + } -- 2.39.5