[gameinformer] fix extraction(closes #8895)(closes #15363)(closes #17206)
authorRemita Amine <remitamine@gmail.com>
Wed, 10 Jul 2019 14:45:00 +0000 (15:45 +0100)
committerRemita Amine <remitamine@gmail.com>
Wed, 10 Jul 2019 14:45:00 +0000 (15:45 +0100)
youtube_dl/extractor/gameinformer.py

index a2920a793ba45d3fef47eebba26bc3c19517b63c..f1b96c172edd9b80b974ec43aab8be1242e8f9a4 100644 (file)
@@ -1,12 +1,19 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+from .brightcove import BrightcoveNewIE
 from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    get_element_by_class,
+    get_element_by_id,
+)
 
 
 class GameInformerIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>.+)\.aspx'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>[^.?&#]+)'
+    _TESTS = [{
+        # normal Brightcove embed code extracted with BrightcoveNewIE._extract_url
         'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx',
         'md5': '292f26da1ab4beb4c9099f1304d2b071',
         'info_dict': {
@@ -18,16 +25,25 @@ class GameInformerIE(InfoExtractor):
             'upload_date': '20150928',
             'uploader_id': '694940074001',
         },
-    }
+    }, {
+        # Brightcove id inside unique element with field--name-field-brightcove-video-id class
+        'url': 'https://www.gameinformer.com/video-feature/new-gameplay-today/2019/07/09/new-gameplay-today-streets-of-rogue',
+        'info_dict': {
+            'id': '6057111913001',
+            'ext': 'mp4',
+            'title': 'New Gameplay Today – Streets Of Rogue',
+            'timestamp': 1562699001,
+            'upload_date': '20190709',
+            'uploader_id': '694940074001',
+
+        },
+    }]
     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/694940074001/default_default/index.html?videoId=%s'
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
         webpage = self._download_webpage(
             url, display_id, headers=self.geo_verification_headers())
-        brightcove_id = self._search_regex(
-            [r'<[^>]+\bid=["\']bc_(\d+)', r"getVideo\('[^']+video_id=(\d+)"],
-            webpage, 'brightcove id')
-        return self.url_result(
-            self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew',
-            brightcove_id)
+        brightcove_id = clean_html(get_element_by_class('field--name-field-brightcove-video-id', webpage) or get_element_by_id('video-source-content', webpage))
+        brightcove_url = self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id if brightcove_id else BrightcoveNewIE._extract_url(self, webpage)
+        return self.url_result(brightcove_url, 'BrightcoveNew', brightcove_id)