[svtplay] Fix id extraction (closes #26576)

[youtube-dl] / youtube_dl / extractor / washingtonpost.py
diff --git a/youtube_dl/extractor/washingtonpost.py b/youtube_dl/extractor/washingtonpost.py

index 71349d4875f2ec2efee796419146fb27205f29d3..625d0a1cc14a52604f46e264a0f93342056fd9df 100644 (file)
--- a/youtube_dl/extractor/washingtonpost.py
+++ b/youtube_dl/extractor/washingtonpost.py
@@ -13,6 +13,7 @@ from ..utils import (
  class WashingtonPostIE(InfoExtractor):
      IE_NAME = 'washingtonpost'
      _VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+    _EMBED_URL = r'https?://(?:www\.)?washingtonpost\.com/video/c/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
      _TEST = {
          'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
          'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
@@ -27,6 +28,11 @@ class WashingtonPostIE(InfoExtractor):
          },
      }
  
+    @classmethod
+    def _extract_urls(cls, webpage):
+        return re.findall(
+            r'<iframe[^>]+\bsrc=["\'](%s)' % cls._EMBED_URL, webpage)
+
      def _real_extract(self, url):
          video_id = self._match_id(url)
          video_data = self._download_json(
@@ -44,7 +50,7 @@ class WashingtonPostIE(InfoExtractor):
              video_type = s.get('type')
              if video_type == 'smil':
                  continue
-            elif video_type in ('ts', 'hls'):
+            elif video_type in ('ts', 'hls') and ('_master.m3u8' in s_url or '_mobile.m3u8' in s_url):
                  m3u8_formats = self._extract_m3u8_formats(
                      s_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
                  for m3u8_format in m3u8_formats:
@@ -75,11 +81,7 @@ class WashingtonPostIE(InfoExtractor):
                      'filesize': int_or_none(s.get('fileSize')),
                      'url': s_url,
                      'ext': 'mp4',
-                    'protocol': {
-                        'mp4': 'http',
-                        'ts': 'm3u8_native',
-                        'hls': 'm3u8_native',
-                    }.get(s.get('type')),
+                    'protocol': 'm3u8_native' if video_type in ('ts', 'hls') else None,
                  })
          source_media_url = video_data.get('sourceMediaURL')
          if source_media_url: