2 from __future__ import unicode_literals
4 from .common import InfoExtractor
10 get_element_by_attribute,
15 class NobelPrizeIE(InfoExtractor):
16 _VALID_URL = r'https?://(?:www\.)?nobelprize\.org/mediaplayer.*?\bid=(?P<id>\d+)'
18 'url': 'http://www.nobelprize.org/mediaplayer/?id=2636',
19 'md5': '04c81e5714bb36cc4e2232fee1d8157f',
23 'title': 'Announcement of the 2016 Nobel Prize in Physics',
24 'description': 'md5:05beba57f4f5a4bbd4cf2ef28fcff739',
28 def _real_extract(self, url):
29 video_id = self._match_id(url)
30 webpage = self._download_webpage(url, video_id)
31 media = self._parse_json(self._search_regex(
32 r'(?s)var\s*config\s*=\s*({.+?});', webpage,
33 'config'), video_id, js_to_json)['media']
34 title = media['title']
37 for source in media.get('source', []):
38 source_src = source.get('src')
41 ext = mimetype2ext(source.get('type')) or determine_ext(source_src)
43 formats.extend(self._extract_m3u8_formats(
44 source_src, video_id, 'mp4', 'm3u8_native',
45 m3u8_id='hls', fatal=False))
47 formats.extend(self._extract_f4m_formats(
48 update_url_query(source_src, {'hdcore': '3.7.0'}),
49 video_id, f4m_id='hds', fatal=False))
54 self._sort_formats(formats)
59 'description': get_element_by_attribute('itemprop', 'description', webpage),
60 'duration': int_or_none(media.get('duration')),