X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fctvnews.py;h=03f8cefb77beb1185e14c2f435616d8f25f70f83;hb=HEAD;hp=e14b30085a397a43c9b0c0999b584bdd084bf45c;hpb=bf4fa24414d2f4f4418b17ed379eb60df5726c4f;p=youtube-dl diff --git a/youtube_dl/extractor/ctvnews.py b/youtube_dl/extractor/ctvnews.py index e14b30085..03f8cefb7 100644 --- a/youtube_dl/extractor/ctvnews.py +++ b/youtube_dl/extractor/ctvnews.py @@ -4,16 +4,17 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import orderedSet class CTVNewsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P[0-9.]+)' + _VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P[0-9.]+)' _TESTS = [{ 'url': 'http://www.ctvnews.ca/video?clipId=901995', - 'md5': '10deb320dc0ccb8d01d34d12fc2ea672', + 'md5': '9b8624ba66351a23e0b6e1391971f9af', 'info_dict': { 'id': '901995', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Extended: \'That person cannot be me\' Johnson says', 'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285', 'timestamp': 1467286284, @@ -27,18 +28,21 @@ class CTVNewsIE(InfoExtractor): }, 'playlist_mincount': 19, }, { - 'url': 'http://www.ctvnews.ca/video?binId=1.810401', + 'url': 'http://www.ctvnews.ca/video?binId=1.2876780', 'info_dict': { - 'id': '1.810401', + 'id': '1.2876780', }, - 'playlist_mincount': 91, + 'playlist_mincount': 100, }, { 'url': 'http://www.ctvnews.ca/1.810401', 'only_matching': True, }, { 'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231', 'only_matching': True, + }, { + 'url': 'http://vancouverisland.ctvnews.ca/video?clipId=761241', + 'only_matching': True, }] def _real_extract(self, url): @@ -57,8 +61,8 @@ class CTVNewsIE(InfoExtractor): else: webpage = self._download_webpage('http://www.ctvnews.ca/%s' % page_id, page_id, query={ 'ot': 'example.AjaxPageLayout.ot', - 'maxItemsPerPage': 20, + 'maxItemsPerPage': 1000000, }) - entries = [ninecninemedia_url_result(clip_id) for clip_id in set( + entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet( re.findall(r'clip\.id\s*=\s*(\d+);', webpage))] return self.playlist_result(entries, page_id)