X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcanalc2.py;h=c4fefefe43b250c13c3a711cf397e1a3caa046a7;hb=47e0e1e0e2306dd7119844bbbc3ef39ccb175ed1;hp=d0e2ed536c3551f5bdb7f1bd82b2ee956a554f21;hpb=cd0abcc0bb4c218fd02850a139b626d252e22599;p=youtube-dl diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py index d0e2ed536..c4fefefe4 100644 --- a/youtube_dl/extractor/canalc2.py +++ b/youtube_dl/extractor/canalc2.py @@ -1,37 +1,41 @@ # coding: utf-8 -"""Extractor for canalc2.tv""" +from __future__ import unicode_literals + import re -import lxml.html from .common import InfoExtractor + class Canalc2IE(InfoExtractor): - """Extractor for canalc2.tv""" - _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui' + IE_NAME = 'canalc2.tv' + _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P\d+)' _TEST = { - u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui', - u'file': u'12163.mp4', - u'md5': u'c00fa80517373764ff5c0b5eb5a58780', - u'info_dict': { - u'title': u'Terrasses du Numérique' + 'url': 'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui', + 'md5': '060158428b650f896c542dfbb3d6487f', + 'info_dict': { + 'id': '12163', + 'ext': 'mp4', + 'title': 'Terrasses du Numérique' } } def _real_extract(self, url): - video_id = re.match(self._VALID_URL, url).group(1) + video_id = re.match(self._VALID_URL, url).group('id') + # We need to set the voir field for getting the file name + url = 'http://www.canalc2.tv/video.asp?idVideo=%s&voir=oui' % video_id webpage = self._download_webpage(url, video_id) - file_name = re.search(r"so\.addVariable\('file','(.*?)'\);", - webpage).group(1) - + file_name = self._search_regex( + r"so\.addVariable\('file','(.*?)'\);", + webpage, 'file name') video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name - - html = lxml.html.fromstring(webpage) - - title = html.cssselect('.evenement8')[0].text_content() - - return {'id': video_id, - 'ext' : 'mp4', - 'url' : video_url, - 'title' : title - } + + title = self._html_search_regex( + r'class="evenement8">(.*?)', webpage, 'title') + + return { + 'id': video_id, + 'ext': 'mp4', + 'url': video_url, + 'title': title, + }