[cspan] Fix extraction (fixes #8032)
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Mon, 28 Dec 2015 12:48:10 +0000 (13:48 +0100)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Mon, 28 Dec 2015 12:50:29 +0000 (13:50 +0100)
youtube_dl/extractor/cspan.py

index 154bddb430c89b7691657d709586129711295032..b3ee670188e2e4b7fb1e50d173ee091e74a05654 100644 (file)
@@ -60,11 +60,13 @@ class CSpanIE(InfoExtractor):
         video_id = self._match_id(url)
         video_type = None
         webpage = self._download_webpage(url, video_id)
-        matches = re.search(r'data-(prog|clip)id=\'([0-9]+)\'', webpage)
-        if matches:
+        # We first look for clipid, because clipprog always appears before
+        patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
+        results = list(filter(None, (re.search(p, webpage) for p in patterns)))
+        if results:
+            matches = results[0]
             video_type, video_id = matches.groups()
-            if video_type == 'prog':
-                video_type = 'program'
+            video_type = 'clip' if video_type == 'id' else 'program'
         else:
             senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
             if senate_isvp_url: