[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / thescene.py
index 08d666eaf9300e119c84ae619ad06870c21dff9e..cd642355c9fd738bb461bff921aa395fa0304f4b 100644 (file)
@@ -2,12 +2,11 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 
-from ..compat import compat_urllib_parse
-from ..utils import qualities
+from ..compat import compat_urlparse
 
 
 class TheSceneIE(InfoExtractor):
-    _VALID_URL = r'https://thescene\.com/watch/[^/]+/(?P<id>[^/#?]+)'
+    _VALID_URL = r'https?://thescene\.com/watch/[^/]+/(?P<id>[^/#?]+)'
 
     _TEST = {
         'url': 'https://thescene.com/watch/vogue/narciso-rodriguez-spring-2013-ready-to-wear',
@@ -16,33 +15,30 @@ class TheSceneIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Narciso Rodriguez: Spring 2013 Ready-to-Wear',
             'display_id': 'narciso-rodriguez-spring-2013-ready-to-wear',
+            'duration': 127,
+            'series': 'Style.com Fashion Shows',
+            'season': 'Ready To Wear Spring 2013',
+            'tags': list,
+            'categories': list,
+            'upload_date': '20120913',
+            'timestamp': 1347512400,
+            'uploader': 'vogue',
         },
     }
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
+
         webpage = self._download_webpage(url, display_id)
-        player_url = compat_urllib_parse.urljoin(
+
+        player_url = compat_urlparse.urljoin(
             url,
             self._html_search_regex(
                 r'id=\'js-player-script\'[^>]+src=\'(.+?)\'', webpage, 'player url'))
 
-        self.to_screen(player_url)
-        player = self._download_webpage(player_url, player_url)
-        info = self._parse_json(self._search_regex(r'(?m)var\s+video\s+=\s+({.+?});$', player, 'info json'), display_id)
-
-        qualities_order = qualities(['low', 'high'])
-        formats = [{
-            'format_id': '{0}-{1}'.format(f['type'].split('/')[0], f['quality']),
-            'url': f['src'],
-            'quality': qualities_order(f['quality']),
-        } for f in info['sources'][0]]
-        self._sort_formats(formats)
-
         return {
-            'id': info['id'],
-            'title': info['title'],
-            'formats': formats,
-            'thumbnail': info.get('poster_frame'),
+            '_type': 'url_transparent',
             'display_id': display_id,
+            'url': player_url,
+            'ie_key': 'CondeNast',
         }