X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcbc.py;h=751a3a8f26c94ecb19c130503593515312bac6c7;hb=be96f9924f4b93ab4632b602a7e7b97518a1ddab;hp=ce8e3d346b56f4b3b48b789723324d06607b2ea6;hpb=986c0b0215b127713825fa1523966ac66e03157b;p=youtube-dl diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index ce8e3d346..751a3a8f2 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -17,6 +17,7 @@ from ..utils import ( xpath_element, xpath_with_ns, find_xpath_attr, + orderedSet, parse_duration, parse_iso8601, parse_age_limit, @@ -136,9 +137,15 @@ class CBCIE(InfoExtractor): entries = [ self._extract_player_init(player_init, display_id) for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)] + media_ids = [] + for media_id_re in ( + r']+src="[^"]+?mediaId=(\d+)"', + r']+\bid=["\']player-(\d+)', + r'guid["\']\s*:\s*["\'](\d+)'): + media_ids.extend(re.findall(media_id_re, webpage)) entries.extend([ self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) - for media_id in re.findall(r']+src="[^"]+?mediaId=(\d+)"', webpage)]) + for media_id in orderedSet(media_ids)]) return self.playlist_result( entries, display_id, strip_or_none(title), self._og_search_description(webpage)) @@ -353,7 +360,7 @@ class CBCWatchVideoIE(CBCWatchBaseIE): class CBCWatchIE(CBCWatchBaseIE): IE_NAME = 'cbc.ca:watch' - _VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P[0-9a-f-]+)' + _VALID_URL = r'https?://(?:gem|watch)\.cbc\.ca/(?:[^/]+/)+(?P[0-9a-f-]+)' _TESTS = [{ # geo-restricted to Canada, bypassable 'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4', @@ -379,6 +386,9 @@ class CBCWatchIE(CBCWatchBaseIE): 'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.', }, 'playlist_mincount': 30, + }, { + 'url': 'https://gem.cbc.ca/media/this-hour-has-22-minutes/season-26/episode-20/38e815a-0108c6c6a42', + 'only_matching': True, }] def _real_extract(self, url):