Improve some _VALID_URLs

[youtube-dl] / youtube_dl / extractor / ustream.py
diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py

index a29d67e9fa6e36713e1ede4bb034724eff311828..a3dc9d33ec6464b524b8ac974e04e63c06c02458 100644 (file)
--- a/youtube_dl/extractor/ustream.py
+++ b/youtube_dl/extractor/ustream.py
@@ -14,7 +14,7 @@ from ..utils import (
  
  
  class UstreamIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
      IE_NAME = 'ustream'
      _TESTS = [{
          'url': 'http://www.ustream.tv/recorded/20274954',
@@ -22,8 +22,12 @@ class UstreamIE(InfoExtractor):
          'info_dict': {
              'id': '20274954',
              'ext': 'flv',
-            'uploader': 'Young Americans for Liberty',
              'title': 'Young Americans for Liberty February 7, 2012 2:28 AM',
+            'description': 'Young Americans for Liberty February 7, 2012 2:28 AM',
+            'timestamp': 1328577035,
+            'upload_date': '20120207',
+            'uploader': 'yaliberty',
+            'uploader_id': '6780869',
          },
      }, {
          # From http://sportscanada.tv/canadagames/index.php/week2/figure-skating/444
@@ -35,14 +39,21 @@ class UstreamIE(InfoExtractor):
              'ext': 'flv',
              'title': '-CG11- Canada Games Figure Skating',
              'uploader': 'sportscanadatv',
-        }
+        },
+        'skip': 'This Pro Broadcaster has chosen to remove this video from the ustream.tv site.',
+    }, {
+        'url': 'http://www.ustream.tv/embed/10299409',
+        'info_dict': {
+            'id': '10299409',
+        },
+        'playlist_count': 3,
      }]
  
      def _real_extract(self, url):
          m = re.match(self._VALID_URL, url)
          video_id = m.group('id')
  
-        # some sites use this embed format (see: http://github.com/rg3/youtube-dl/issues/2990)
+        # some sites use this embed format (see: https://github.com/rg3/youtube-dl/issues/2990)
          if m.group('type') == 'embed/recorded':
              video_id = m.group('id')
              desktop_url = 'http://www.ustream.tv/recorded/' + video_id
@@ -50,10 +61,12 @@ class UstreamIE(InfoExtractor):
          if m.group('type') == 'embed':
              video_id = m.group('id')
              webpage = self._download_webpage(url, video_id)
-            desktop_video_id = self._html_search_regex(
-                r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
-            desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
-            return self.url_result(desktop_url, 'Ustream')
+            content_video_ids = self._parse_json(self._search_regex(
+                r'ustream\.vars\.offAirContentVideoIds=([^;]+);', webpage,
+                'content video IDs'), video_id)
+            return self.playlist_result(
+                map(lambda u: self.url_result('http://www.ustream.tv/recorded/' + u, 'Ustream'), content_video_ids),
+                video_id)
  
          params = self._download_json(
              'https://api.ustream.tv/videos/%s.json' % video_id, video_id)
@@ -104,7 +117,7 @@ class UstreamIE(InfoExtractor):
  
  
  class UstreamChannelIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
+    _VALID_URL = r'https?://(?:www\.)?ustream\.tv/channel/(?P<slug>.+)'
      IE_NAME = 'ustream:channel'
      _TEST = {
          'url': 'http://www.ustream.tv/channel/channeljapan',