[vice] update tests and add support for ooyala embeds in article pages
authorRemita Amine <remitamine@gmail.com>
Fri, 5 May 2017 15:12:40 +0000 (16:12 +0100)
committerRemita Amine <remitamine@gmail.com>
Fri, 5 May 2017 15:13:12 +0000 (16:13 +0100)
youtube_dl/extractor/vice.py

index b2e95734b6fe06cc602c3a8fd3f16ea82d2d05bc..54e207b391360b94f879b5de73e2c1dd31df7126 100644 (file)
@@ -32,7 +32,8 @@ class ViceBaseIE(AdobePassIE):
             resource = self._get_mvpd_resource(
                 'VICELAND', title, video_id,
                 watch_hub_data.get('video-rating'))
-            query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource)
+            query['tvetoken'] = self._extract_mvpd_auth(
+                url, video_id, 'VICELAND', resource)
 
         # signature generation algorithm is reverse engineered from signatureGenerator in
         # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
@@ -45,11 +46,14 @@ class ViceBaseIE(AdobePassIE):
 
         try:
             host = 'www.viceland' if is_locked else self._PREPLAY_HOST
-            preplay = self._download_json('https://%s.com/%s/preplay/%s' % (host, locale, video_id), video_id, query=query)
+            preplay = self._download_json(
+                'https://%s.com/%s/preplay/%s' % (host, locale, video_id),
+                video_id, query=query)
         except ExtractorError as e:
             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
                 error = json.loads(e.cause.read().decode())
-                raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True)
+                raise ExtractorError('%s said: %s' % (
+                    self.IE_NAME, error['details']), expected=True)
             raise
 
         video_data = preplay['video']
@@ -88,16 +92,17 @@ class ViceBaseIE(AdobePassIE):
 
 
 class ViceIE(ViceBaseIE):
-    _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?P<locale>[^/]+)/(?:[^/]+/)?videos?/(?P<id>[^/?#&]+)'
+    IE_NAME = 'vice'
+    _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:(?P<locale>[^/]+)/)?videos?/(?P<id>[^/?#&]+)'
 
     _TESTS = [{
-        'url': 'http://www.vice.com/video/cowboy-capitalists-part-1',
-        'md5': 'e9d77741f9e42ba583e683cd170660f7',
+        'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
+        'md5': '7d3ae2f9ba5f196cdd9f9efd43657ac2',
         'info_dict': {
-            'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
+            'id': 'N2bzkydjraWDGwnt8jAttCF6Y0PDv4Zj',
             'ext': 'flv',
-            'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
-            'duration': 725.983,
+            'title': 'Monkey Labs of Holland',
+            'description': 'md5:92b3c7dcbfe477f772dd4afa496c9149',
         },
         'add_ie': ['Ooyala'],
     }, {
@@ -136,22 +141,13 @@ class ViceIE(ViceBaseIE):
         },
         'add_ie': ['UplynkPreplay'],
     }, {
-        'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
-        'only_matching': True,
-    }, {
-        'url': 'http://www.vice.com/ru/video/big-night-out-ibiza-clive-martin-229',
-        'only_matching': True,
-    }, {
-        'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show',
+        'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
         'only_matching': True,
     }]
     _PREPLAY_HOST = 'video.vice'
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        locale = mobj.group('locale')
-        video_id = self._match_id(url)
+        locale, video_id = re.match(self._VALID_URL, url).groups()
         webpage, urlh = self._download_webpage_handle(url, video_id)
         embed_code = self._search_regex(
             r'embedCode=([^&\'"]+)', webpage,
@@ -166,6 +162,7 @@ class ViceIE(ViceBaseIE):
 
 
 class ViceShowIE(InfoExtractor):
+    IE_NAME = 'vice:show'
     _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)'
 
     _TEST = {
@@ -192,12 +189,14 @@ class ViceShowIE(InfoExtractor):
             r'<title>(.+?)</title>', webpage, 'title', default=None)
         if title:
             title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip()
-        description = self._html_search_meta('description', webpage, 'description')
+        description = self._html_search_meta(
+            'description', webpage, 'description')
 
         return self.playlist_result(entries, show_id, title, description)
 
 
 class ViceArticleIE(InfoExtractor):
+    IE_NAME = 'vice:article'
     _VALID_URL = r'https://www.vice.com/[^/]+/article/(?P<id>[^?#]+)'
 
     _TESTS = [{
@@ -216,8 +215,9 @@ class ViceArticleIE(InfoExtractor):
             # AES-encrypted m3u8
             'skip_download': True,
         },
+        'add_ie': ['UplynkPreplay'],
     }, {
-        'url': 'http://www.vice.com/video/how-to-hack-a-car',
+        'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
         'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
         'info_dict': {
             'id': '3jstaBeXgAs',
@@ -229,6 +229,12 @@ class ViceArticleIE(InfoExtractor):
             'upload_date': '20140529',
         },
         'add_ie': ['Youtube'],
+    }, {
+        'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -240,22 +246,29 @@ class ViceArticleIE(InfoExtractor):
             r'window\.__PREFETCH_DATA\s*=\s*({.*});',
             webpage, 'prefetch data'), display_id)
         body = prefetch_data['body']
-        youtube_url = self._html_search_regex(
-            r'<iframe[^>]+src="(.*youtube\.com/.*)"', body, 'YouTube URL', default=None)
-        if youtube_url:
+
+        def _url_res(video_url, ie_key):
             return {
                 '_type': 'url_transparent',
-                'url': youtube_url,
+                'url': video_url,
                 'display_id': display_id,
-                'ie_key': 'Youtube',
+                'ie_key': ie_key,
             }
 
+        embed_code = self._search_regex(
+            r'embedCode=([^&\'"]+)', body,
+            'ooyala embed code', default=None)
+        if embed_code:
+            return _url_res('ooyala:%s' % embed_code, 'Ooyala')
+
+        youtube_url = self._html_search_regex(
+            r'<iframe[^>]+src="(.*youtube\.com/.*)"',
+            body, 'YouTube URL', default=None)
+        if youtube_url:
+            return _url_res(youtube_url, 'Youtube')
+
         video_url = self._html_search_regex(
-            r'data-video-url="([^"]+)"', prefetch_data['embed_code'], 'video URL')
+            r'data-video-url="([^"]+)"',
+            prefetch_data['embed_code'], 'video URL')
 
-        return {
-            '_type': 'url_transparent',
-            'url': video_url,
-            'display_id': display_id,
-            'ie_key': ViceIE.ie_key(),
-        }
+        return _url_res(video_url, ViceIE.ie_key())