[generic] Add support for multiple brightcove URLs (Fixes #2283)

author Philipp Hagemeister <phihag@phihag.de>

Mon, 3 Feb 2014 14:19:40 +0000 (15:19 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Mon, 3 Feb 2014 14:19:40 +0000 (15:19 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Mon, 3 Feb 2014 14:19:40 +0000 (15:19 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Mon, 3 Feb 2014 14:19:40 +0000 (15:19 +0100)
diff --git a/test/test_playlists.py b/test/test_playlists.py

index b3ce6f71ef3bdb17a9e700729984bbfd840d30ef..fda2e0112c6d37f0d90b3cb56097c03c12ed6f3c 100644 (file)
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -34,6 +34,7 @@ from youtube_dl.extractor import (
      KhanAcademyIE,
      EveryonesMixtapeIE,
      RutubeChannelIE,
+    GenericIE,
  )
  
  
@@ -229,6 +230,16 @@ class TestPlaylists(unittest.TestCase):
          self.assertEqual(result['id'], '1409')
          self.assertTrue(len(result['entries']) >= 34)
  
+    def test_multiple_brightcove_videos(self):
+        # https://github.com/rg3/youtube-dl/issues/2283
+        dl = FakeYDL()
+        ie = GenericIE(dl)
+        result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], 'always-never-nuclear-command-and-control')
+        self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
+        self.assertEqual(len(result['entries']), 3)
+
  
  if __name__ == '__main__':
      unittest.main()
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py

index 9ccf923a63fbd59b098f2e0edb15c025c5d0b602..031fe385d906dd8f4a53f8440955d325e5b0add1 100644 (file)
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -127,25 +127,28 @@ class BrightcoveIE(InfoExtractor):
  
      @classmethod
      def _extract_brightcove_url(cls, webpage):
-        """Try to extract the brightcove url from the wepbage, returns None
+        """Try to extract the brightcove url from the webpage, returns None
          if it can't be found
          """
+        urls = cls._extract_brightcove_urls(webpage)
+        return urls[0] if urls else None
+
+    @classmethod
+    def _extract_brightcove_urls(cls, webpage):
+        """Return a list of all Brightcove URLs from the webpage """
  
          url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
          if url_m:
-            return url_m.group(1)
+            return [url_m.group(1)]
  
-        m_brightcove = re.search(
+        matches = re.findall(
              r'''(?sx)<object
              (?:
-                [^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 |
+                [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
                  [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
              ).+?</object>''',
              webpage)
-        if m_brightcove is not None:
-            return cls._build_brighcove_url(m_brightcove.group())
-        else:
-            return None
+        return [cls._build_brighcove_url(m) for m in matches]
  
      def _real_extract(self, url):
          url, smuggled_data = unsmuggle_url(url, {})
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 082da9c77b138f35651e2de8a2d035f8fee1f517..5bcc78bf79734ddd47ee93b2215c31bdc28e59d3 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -234,11 +234,21 @@ class GenericIE(InfoExtractor):
              r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
  
          # Look for BrightCove:
-        bc_url = BrightcoveIE._extract_brightcove_url(webpage)
-        if bc_url is not None:
+        bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
+        if bc_urls:
              self.to_screen('Brightcove video detected.')
-            surl = smuggle_url(bc_url, {'Referer': url})
-            return self.url_result(surl, 'Brightcove')
+            entries = [{
+                '_type': 'url',
+                'url': smuggle_url(bc_url, {'Referer': url}),
+                'ie_key': 'Brightcove'
+            } for bc_url in bc_urls]
+
+            return {
+                '_type': 'playlist',
+                'title': video_title,
+                'id': video_id,
+                'entries': entries,
+            }
  
          # Look for embedded (iframe) Vimeo player
          mobj = re.search(
author	Philipp Hagemeister <phihag@phihag.de>
	Mon, 3 Feb 2014 14:19:40 +0000 (15:19 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Mon, 3 Feb 2014 14:19:40 +0000 (15:19 +0100)
test/test_playlists.py		patch \| blob \| history
youtube_dl/extractor/brightcove.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history