[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / washingtonpost.py
index 71349d4875f2ec2efee796419146fb27205f29d3..625d0a1cc14a52604f46e264a0f93342056fd9df 100644 (file)
@@ -13,6 +13,7 @@ from ..utils import (
 class WashingtonPostIE(InfoExtractor):
     IE_NAME = 'washingtonpost'
     _VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+    _EMBED_URL = r'https?://(?:www\.)?washingtonpost\.com/video/c/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
     _TEST = {
         'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
         'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
@@ -27,6 +28,11 @@ class WashingtonPostIE(InfoExtractor):
         },
     }
 
+    @classmethod
+    def _extract_urls(cls, webpage):
+        return re.findall(
+            r'<iframe[^>]+\bsrc=["\'](%s)' % cls._EMBED_URL, webpage)
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video_data = self._download_json(
@@ -44,7 +50,7 @@ class WashingtonPostIE(InfoExtractor):
             video_type = s.get('type')
             if video_type == 'smil':
                 continue
-            elif video_type in ('ts', 'hls'):
+            elif video_type in ('ts', 'hls') and ('_master.m3u8' in s_url or '_mobile.m3u8' in s_url):
                 m3u8_formats = self._extract_m3u8_formats(
                     s_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
                 for m3u8_format in m3u8_formats:
@@ -75,11 +81,7 @@ class WashingtonPostIE(InfoExtractor):
                     'filesize': int_or_none(s.get('fileSize')),
                     'url': s_url,
                     'ext': 'mp4',
-                    'protocol': {
-                        'mp4': 'http',
-                        'ts': 'm3u8_native',
-                        'hls': 'm3u8_native',
-                    }.get(s.get('type')),
+                    'protocol': 'm3u8_native' if video_type in ('ts', 'hls') else None,
                 })
         source_media_url = video_data.get('sourceMediaURL')
         if source_media_url: