1 from __future__ import unicode_literals
3 from .common import InfoExtractor
14 class NFBIE(InfoExtractor):
16 IE_DESC = 'National Film Board of Canada'
17 _VALID_URL = r'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'
20 'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
22 'id': 'qallunaat_why_white_people_are_funny',
24 'title': 'Qallunaat! Why White People Are Funny ',
25 'description': 'md5:6b8e32dde3abf91e58857b174916620c',
27 'creator': 'Mark Sandiford',
28 'uploader': 'Mark Sandiford',
32 'skip_download': True,
36 def _real_extract(self, url):
37 video_id = self._match_id(url)
39 config = self._download_xml(
40 'https://www.nfb.ca/film/%s/player_config' % video_id,
41 video_id, 'Downloading player config XML',
42 data=urlencode_postdata({'getConfig': 'true'}),
44 'Content-Type': 'application/x-www-form-urlencoded',
45 'X-NFB-Referer': 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf'
48 title, description, thumbnail, duration, uploader, author = [None] * 6
49 thumbnails, formats = [[]] * 2
52 for media in config.findall('./player/stream/media'):
53 if media.get('type') == 'posterImage':
54 quality_key = qualities(('low', 'high'))
56 for asset in media.findall('assets/asset'):
57 asset_url = xpath_text(asset, 'default/url', default=None)
60 quality = asset.get('quality')
64 'preference': quality_key(quality),
66 elif media.get('type') == 'video':
67 title = xpath_text(media, 'title', fatal=True)
68 for asset in media.findall('assets/asset'):
69 quality = asset.get('quality')
70 height = int_or_none(self._search_regex(
71 r'^(\d+)[pP]$', quality or '', 'height', default=None))
73 streamer = xpath_text(node, 'streamerURI', default=None)
76 play_path = xpath_text(node, 'url', default=None)
81 'app': streamer.split('/', 3)[3],
82 'play_path': play_path,
85 'format_id': '%s-%s' % (node.tag, quality) if quality else node.tag,
88 self._sort_formats(formats)
89 description = clean_html(xpath_text(media, 'description'))
90 uploader = xpath_text(media, 'author')
91 duration = int_or_none(media.get('duration'))
92 for subtitle in media.findall('./subtitles/subtitle'):
93 subtitle_url = xpath_text(subtitle, 'url', default=None)
96 lang = xpath_text(subtitle, 'lang', default='en')
97 subtitles.setdefault(lang, []).append({
99 'ext': (subtitle.get('format') or determine_ext(subtitle_url)).lower(),
105 'description': description,
106 'thumbnails': thumbnails,
107 'duration': duration,
109 'uploader': uploader,
111 'subtitles': subtitles,