Improve some _VALID_URLs

[youtube-dl] / youtube_dl / extractor / ustream.py
diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py

index 73b05ecab82a10a6c80360b0b980f285dfbb9c45..a3dc9d33ec6464b524b8ac974e04e63c06c02458 100644 (file)
--- a/youtube_dl/extractor/ustream.py
+++ b/youtube_dl/extractor/ustream.py
@@ -14,7 +14,7 @@ from ..utils import (
  
  
  class UstreamIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
      IE_NAME = 'ustream'
      _TESTS = [{
          'url': 'http://www.ustream.tv/recorded/20274954',
@@ -41,13 +41,19 @@ class UstreamIE(InfoExtractor):
              'uploader': 'sportscanadatv',
          },
          'skip': 'This Pro Broadcaster has chosen to remove this video from the ustream.tv site.',
+    }, {
+        'url': 'http://www.ustream.tv/embed/10299409',
+        'info_dict': {
+            'id': '10299409',
+        },
+        'playlist_count': 3,
      }]
  
      def _real_extract(self, url):
          m = re.match(self._VALID_URL, url)
          video_id = m.group('id')
  
-        # some sites use this embed format (see: http://github.com/rg3/youtube-dl/issues/2990)
+        # some sites use this embed format (see: https://github.com/rg3/youtube-dl/issues/2990)
          if m.group('type') == 'embed/recorded':
              video_id = m.group('id')
              desktop_url = 'http://www.ustream.tv/recorded/' + video_id
@@ -55,10 +61,12 @@ class UstreamIE(InfoExtractor):
          if m.group('type') == 'embed':
              video_id = m.group('id')
              webpage = self._download_webpage(url, video_id)
-            desktop_video_id = self._html_search_regex(
-                r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
-            desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
-            return self.url_result(desktop_url, 'Ustream')
+            content_video_ids = self._parse_json(self._search_regex(
+                r'ustream\.vars\.offAirContentVideoIds=([^;]+);', webpage,
+                'content video IDs'), video_id)
+            return self.playlist_result(
+                map(lambda u: self.url_result('http://www.ustream.tv/recorded/' + u, 'Ustream'), content_video_ids),
+                video_id)
  
          params = self._download_json(
              'https://api.ustream.tv/videos/%s.json' % video_id, video_id)
@@ -109,7 +117,7 @@ class UstreamIE(InfoExtractor):
  
  
  class UstreamChannelIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
+    _VALID_URL = r'https?://(?:www\.)?ustream\.tv/channel/(?P<slug>.+)'
      IE_NAME = 'ustream:channel'
      _TEST = {
          'url': 'http://www.ustream.tv/channel/channeljapan',