X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Fcanalplus.py;h=d8bf073f40cf171d547992679d295055f82ef386;hb=2501d41ef4b9ed0349cf4f9838e12873350e60d5;hp=8d0f9115800fe45c4d30812fe49b3a89aef6c648;hpb=74193838f71addcb08a9f56a7fad8c2e7df298ec;p=youtube-dl diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 8d0f91158..d8bf073f4 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re @@ -6,11 +6,13 @@ import re from .common import InfoExtractor from ..compat import compat_urllib_parse_urlparse from ..utils import ( - ExtractorError, - HEADRequest, - unified_strdate, - qualities, + dict_get, + # ExtractorError, + # HEADRequest, int_or_none, + qualities, + remove_end, + unified_strdate, ) @@ -23,6 +25,9 @@ class CanalplusIE(InfoExtractor): (?:(?:www|m)\.)?canalplus\.fr| (?:www\.)?piwiplus\.fr| (?:www\.)?d8\.tv| + (?:www\.)?c8\.fr| + (?:www\.)?d17\.tv| + (?:(?:football|www)\.)?cstar\.fr| (?:www\.)?itele\.fr )/(?:(?:[^/]+/)*(?P[^/?#&]+))?(?:\?.*\bvid=(?P\d+))?| player\.canalplus\.fr/#/(?P\d+) @@ -34,68 +39,95 @@ class CanalplusIE(InfoExtractor): 'canalplus': 'cplus', 'piwiplus': 'teletoon', 'd8': 'd8', + 'c8': 'd8', + 'd17': 'd17', + 'cstar': 'd17', 'itele': 'itele', } + # Only works for direct mp4 URLs + _GEO_COUNTRIES = ['FR'] + _TESTS = [{ - 'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092', - 'md5': '12164a6f14ff6df8bd628e8ba9b10b78', + 'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814', 'info_dict': { - 'id': '1263092', + 'id': '1405510', + 'display_id': 'pid1830-c-zapping', 'ext': 'mp4', - 'title': 'Le Zapping - 13/05/15', - 'description': 'md5:09738c0d06be4b5d06a0940edb0da73f', - 'upload_date': '20150513', + 'title': 'Zapping - 02/07/2016', + 'description': 'Le meilleur de toutes les chaînes, tous les jours', + 'upload_date': '20160702', }, }, { + # geo restricted, bypassed 'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190', 'info_dict': { 'id': '1108190', - 'ext': 'flv', - 'title': 'Le labyrinthe - Boing super ranger', + 'display_id': 'pid1405-le-labyrinthe-boing-super-ranger', + 'ext': 'mp4', + 'title': 'BOING SUPER RANGER - Ep : Le labyrinthe', 'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff', 'upload_date': '20140724', }, - 'skip': 'Only works from France', + 'expected_warnings': ['HTTP Error 403: Forbidden'], + }, { + # geo restricted, bypassed + 'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html?vid=1443684', + 'md5': 'bb6f9f343296ab7ebd88c97b660ecf8d', + 'info_dict': { + 'id': '1443684', + 'display_id': 'pid6318-videos-integrales', + 'ext': 'mp4', + 'title': 'Guess my iep ! - TPMP - 07/04/2017', + 'description': 'md5:6f005933f6e06760a9236d9b3b5f17fa', + 'upload_date': '20170407', + }, + 'expected_warnings': ['HTTP Error 403: Forbidden'], }, { - 'url': 'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html', + 'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510', 'info_dict': { - 'id': '966289', - 'ext': 'flv', - 'title': 'Campagne intime - Documentaire exceptionnel', - 'description': 'md5:d2643b799fb190846ae09c61e59a859f', - 'upload_date': '20131108', + 'id': '1420176', + 'display_id': 'rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510', + 'ext': 'mp4', + 'title': 'L\'invité de Michaël Darmon du 14/10/2016 - ', + 'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.', + 'upload_date': '20161014', }, - 'skip': 'videos get deleted after a while', }, { - 'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559', - 'md5': '38b8f7934def74f0d6f3ba6c036a5f82', + 'url': 'http://football.cstar.fr/cstar-minisite-foot/pid7566-feminines-videos.html?vid=1416769', 'info_dict': { - 'id': '1213714', + 'id': '1416769', + 'display_id': 'pid7566-feminines-videos', 'ext': 'mp4', - 'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45', - 'description': 'md5:8216206ec53426ea6321321f3b3c16db', - 'upload_date': '20150211', + 'title': 'France - Albanie : les temps forts de la soirée - 20/09/2016', + 'description': 'md5:c3f30f2aaac294c1c969b3294de6904e', + 'upload_date': '20160921', + }, + 'params': { + 'skip_download': True, }, }, { 'url': 'http://m.canalplus.fr/?vid=1398231', 'only_matching': True, + }, { + 'url': 'http://www.d17.tv/emissions/pid8303-lolywood.html?vid=1397061', + 'only_matching': True, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.groupdict().get('id') or mobj.groupdict().get('vid') site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]] # Beware, some subclasses do not define an id group - display_id = mobj.group('display_id') or video_id + display_id = remove_end(dict_get(mobj.groupdict(), ('display_id', 'id', 'vid')), '.html') - if video_id is None: - webpage = self._download_webpage(url, display_id) - video_id = self._search_regex( - [r']+?videoId=(["\'])(?P\d+)', r'id=["\']canal_video_player(?P\d+)'], - webpage, 'video id', group='id') + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( + [r']+?videoId=(["\'])(?P\d+)', + r'id=["\']canal_video_player(?P\d+)', + r'data-video=["\'](?P\d+)'], + webpage, 'video id', default=mobj.group('vid'), group='id') info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id) video_data = self._download_json(info_url, video_id, 'Downloading video JSON') @@ -107,15 +139,15 @@ class CanalplusIE(InfoExtractor): preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD']) - fmt_url = next(iter(media.get('VIDEOS'))) - if '/geo' in fmt_url.lower(): - response = self._request_webpage( - HEADRequest(fmt_url), video_id, - 'Checking if the video is georestricted') - if '/blocage' in response.geturl(): - raise ExtractorError( - 'The video is not available in your country', - expected=True) + # _, fmt_url = next(iter(media['VIDEOS'].items())) + # if '/geo' in fmt_url.lower(): + # response = self._request_webpage( + # HEADRequest(fmt_url), video_id, + # 'Checking if the video is georestricted') + # if '/blocage' in response.geturl(): + # raise ExtractorError( + # 'The video is not available in your country', + # expected=True) formats = [] for format_id, format_url in media['VIDEOS'].items():