Improve some _VALID_URLs
[youtube-dl] / youtube_dl / extractor / ustream.py
index a29d67e9fa6e36713e1ede4bb034724eff311828..a3dc9d33ec6464b524b8ac974e04e63c06c02458 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 
 class UstreamIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)'
     IE_NAME = 'ustream'
     _TESTS = [{
         'url': 'http://www.ustream.tv/recorded/20274954',
@@ -22,8 +22,12 @@ class UstreamIE(InfoExtractor):
         'info_dict': {
             'id': '20274954',
             'ext': 'flv',
-            'uploader': 'Young Americans for Liberty',
             'title': 'Young Americans for Liberty February 7, 2012 2:28 AM',
+            'description': 'Young Americans for Liberty February 7, 2012 2:28 AM',
+            'timestamp': 1328577035,
+            'upload_date': '20120207',
+            'uploader': 'yaliberty',
+            'uploader_id': '6780869',
         },
     }, {
         # From http://sportscanada.tv/canadagames/index.php/week2/figure-skating/444
@@ -35,14 +39,21 @@ class UstreamIE(InfoExtractor):
             'ext': 'flv',
             'title': '-CG11- Canada Games Figure Skating',
             'uploader': 'sportscanadatv',
-        }
+        },
+        'skip': 'This Pro Broadcaster has chosen to remove this video from the ustream.tv site.',
+    }, {
+        'url': 'http://www.ustream.tv/embed/10299409',
+        'info_dict': {
+            'id': '10299409',
+        },
+        'playlist_count': 3,
     }]
 
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url)
         video_id = m.group('id')
 
-        # some sites use this embed format (see: http://github.com/rg3/youtube-dl/issues/2990)
+        # some sites use this embed format (see: https://github.com/rg3/youtube-dl/issues/2990)
         if m.group('type') == 'embed/recorded':
             video_id = m.group('id')
             desktop_url = 'http://www.ustream.tv/recorded/' + video_id
@@ -50,10 +61,12 @@ class UstreamIE(InfoExtractor):
         if m.group('type') == 'embed':
             video_id = m.group('id')
             webpage = self._download_webpage(url, video_id)
-            desktop_video_id = self._html_search_regex(
-                r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
-            desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
-            return self.url_result(desktop_url, 'Ustream')
+            content_video_ids = self._parse_json(self._search_regex(
+                r'ustream\.vars\.offAirContentVideoIds=([^;]+);', webpage,
+                'content video IDs'), video_id)
+            return self.playlist_result(
+                map(lambda u: self.url_result('http://www.ustream.tv/recorded/' + u, 'Ustream'), content_video_ids),
+                video_id)
 
         params = self._download_json(
             'https://api.ustream.tv/videos/%s.json' % video_id, video_id)
@@ -104,7 +117,7 @@ class UstreamIE(InfoExtractor):
 
 
 class UstreamChannelIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
+    _VALID_URL = r'https?://(?:www\.)?ustream\.tv/channel/(?P<slug>.+)'
     IE_NAME = 'ustream:channel'
     _TEST = {
         'url': 'http://www.ustream.tv/channel/channeljapan',