[cspan] Fix extraction (fixes #8032)

author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Mon, 28 Dec 2015 12:48:10 +0000 (13:48 +0100)

committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Mon, 28 Dec 2015 12:50:29 +0000 (13:50 +0100)
author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Mon, 28 Dec 2015 12:48:10 +0000 (13:48 +0100)
committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Mon, 28 Dec 2015 12:50:29 +0000 (13:50 +0100)
diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py

index 154bddb430c89b7691657d709586129711295032..b3ee670188e2e4b7fb1e50d173ee091e74a05654 100644 (file)
--- a/youtube_dl/extractor/cspan.py
+++ b/youtube_dl/extractor/cspan.py
@@ -60,11 +60,13 @@ class CSpanIE(InfoExtractor):
          video_id = self._match_id(url)
          video_type = None
          webpage = self._download_webpage(url, video_id)
-        matches = re.search(r'data-(prog|clip)id=\'([0-9]+)\'', webpage)
-        if matches:
+        # We first look for clipid, because clipprog always appears before
+        patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
+        results = list(filter(None, (re.search(p, webpage) for p in patterns)))
+        if results:
+            matches = results[0]
              video_type, video_id = matches.groups()
-            if video_type == 'prog':
-                video_type = 'program'
+            video_type = 'clip' if video_type == 'id' else 'program'
          else:
              senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
              if senate_isvp_url:
author	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Mon, 28 Dec 2015 12:48:10 +0000 (13:48 +0100)
committer	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Mon, 28 Dec 2015 12:50:29 +0000 (13:50 +0100)