[brightcove] Fix _extract_url (closes #12782)

[youtube-dl] / youtube_dl / extractor / yahoo.py
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py

index 91f0a0dbbda4cfbc7fb3e1ea8b8c24798c7d1546..38f82bf447128d42643afa865c41efc7db9014cd 100644 (file)
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -201,6 +201,32 @@ class YahooIE(InfoExtractor):
              },
              'skip': 'redirect to https://www.yahoo.com/music',
          },
+        {
+            # yahoo://article/
+            'url': 'https://www.yahoo.com/movies/video/true-story-trailer-173000497.html',
+            'info_dict': {
+                'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
+                'ext': 'mp4',
+                'title': "'True Story' Trailer",
+                'description': 'True Story',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
+        {
+            # ytwnews://cavideo/
+            'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html',
+            'info_dict': {
+                'id': 'ba133ff2-0793-3510-b636-59dfe9ff6cff',
+                'ext': 'mp4',
+                'title': '單車天使 - 中文版預',
+                'description': '中文版預',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
      ]
  
      def _real_extract(self, url):
@@ -232,7 +258,7 @@ class YahooIE(InfoExtractor):
              return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
  
          # Look for Brightcove New Studio embeds
-        bc_url = BrightcoveNewIE._extract_url(webpage)
+        bc_url = BrightcoveNewIE._extract_url(self, webpage)
          if bc_url:
              return self.url_result(bc_url, BrightcoveNewIE.ie_key())
  
@@ -269,7 +295,8 @@ class YahooIE(InfoExtractor):
                      r'"first_videoid"\s*:\s*"([^"]+)"',
                      r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
                      r'<article[^>]data-uuid=["\']([^"\']+)',
-                    r'yahoo://article/view\?.*\buuid=([^&"\']+)',
+                    r'<meta[^<>]+yahoo://article/view\?.*\buuid=([^&"\']+)',
+                    r'<meta[^<>]+["\']ytwnews://cavideo/(?:[^/]+/)+([\da-fA-F-]+)[&"\']',
                  ]
                  video_id = self._search_regex(
                      CONTENT_ID_REGEXES, webpage, 'content ID')