2 from __future__ import unicode_literals
6 from .common import InfoExtractor
12 class TheInterceptIE(InfoExtractor):
13 _VALID_URL = r'https://theintercept.com/fieldofvision/(?P<id>.+?)/'
15 'url': 'https://theintercept.com/fieldofvision/thisisacoup-episode-four-surrender-or-die/',
17 'id': 'thisisacoup-episode-four-surrender-or-die',
19 'title': '#ThisIsACoup – Episode Four: Surrender or Die',
20 'upload_date': '20151218',
21 'description': 'md5:74dd27f0e2fbd50817829f97eaa33140',
25 def _real_extract(self, url):
26 display_id = self._match_id(url)
27 webpage = self._download_webpage(url, display_id)
29 mobj = re.search(r'initialStoreTree =(?P<json_data>.+})', webpage)
31 raise ExtractorError('Unable to extract initialStoreTree')
32 json_data = self._parse_json(mobj.group('json_data'), display_id)
35 for post in json_data['resources']['posts'].values():
36 if post['slug'] == display_id:
40 raise ExtractorError('Unable to find info for %s'%display_id)
43 description = info['excerpt']
44 upload_date = info['date'][:10].replace('-', '')
45 video_id = info['fov_videoid']
46 creator = ','.join([a['display_name'] for a in info['authors']])
47 thumbnail = self._og_search_property('image', webpage)
48 content_id = thumbnail.split('/')[-1].split('.')[0]
49 content_url = 'https://content.jwplatform.com/jw6/{content_id}.xml'.format(content_id=content_id)
50 content = self._download_xml(content_url, video_id)
53 for source in content.findall('.//{http://rss.jwpcdn.com/}source'):
54 if source.attrib['file'].endswith('.m3u8'):
55 formats.extend(self._extract_m3u8_formats(
56 source.attrib['file'], video_id, 'mp4', preference=1, m3u8_id='hls'))
60 'description': description,
61 'display_id': display_id,
65 'thumbnail': thumbnail,
67 'upload_date': upload_date,