class WashingtonPostIE(InfoExtractor):
IE_NAME = 'washingtonpost'
_VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _EMBED_URL = r'https?://(?:www\.)?washingtonpost\.com/video/c/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
_TEST = {
'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
},
}
+ @classmethod
+ def _extract_urls(cls, webpage):
+ return re.findall(
+ r'<iframe[^>]+\bsrc=["\'](%s)' % cls._EMBED_URL, webpage)
+
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
video_type = s.get('type')
if video_type == 'smil':
continue
- elif video_type in ('ts', 'hls'):
+ elif video_type in ('ts', 'hls') and ('_master.m3u8' in s_url or '_mobile.m3u8' in s_url):
m3u8_formats = self._extract_m3u8_formats(
s_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
for m3u8_format in m3u8_formats:
'filesize': int_or_none(s.get('fileSize')),
'url': s_url,
'ext': 'mp4',
- 'protocol': {
- 'mp4': 'http',
- 'ts': 'm3u8_native',
- 'hls': 'm3u8_native',
- }.get(s.get('type')),
+ 'protocol': 'm3u8_native' if video_type in ('ts', 'hls') else None,
})
source_media_url = video_data.get('sourceMediaURL')
if source_media_url: