[ign]: support some country versions and add an extractor for 1up.com
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 12 Jul 2013 09:39:40 +0000 (11:39 +0200)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 12 Jul 2013 09:39:40 +0000 (11:39 +0200)
1up.com uses the gin video system, the extractor is a subclass of IGNIE, it just replaces the video id

youtube_dl/extractor/__init__.py
youtube_dl/extractor/ign.py

index 491c5368f60068b9be582a508681937a570aab7a..c00d5a3529a5c89c4e338c778ea9500fe9b936c7 100644 (file)
@@ -27,7 +27,7 @@ from .googlesearch import GoogleSearchIE
 from .hotnewhiphop import HotNewHipHopIE
 from .howcast import HowcastIE
 from .hypem import HypemIE
 from .hotnewhiphop import HotNewHipHopIE
 from .howcast import HowcastIE
 from .hypem import HypemIE
-from .ign import IGNIE
+from .ign import IGNIE, OneUPIE
 from .ina import InaIE
 from .infoq import InfoQIE
 from .instagram import InstagramIE
 from .ina import InaIE
 from .infoq import InfoQIE
 from .instagram import InstagramIE
index b3a850d746e62cdfd3e50e6d3c92b57d3fc72c39..62abab65552dffdce1eec252f6860dd847d36b91 100644 (file)
@@ -6,10 +6,21 @@ from ..utils import (
     determine_ext,
 )
 
     determine_ext,
 )
 
+
 class IGNIE(InfoExtractor):
 class IGNIE(InfoExtractor):
-    _VALID_URL = r'http://www.ign.com/videos/.+/(?P<name>.+)'
+    """
+    Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
+    Some videos of it.ign.com are also supported
+    """
+
+    _VALID_URL = r'https?://.+?\.ign\.com/(?:videos|show_videos)(/.+)?/(?P<name_or_id>.+)'
     IE_NAME = u'ign.com'
 
     IE_NAME = u'ign.com'
 
+    _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
+    _DESCRIPTION_RE = [r'<span class="page-object-description">(.+?)</span>',
+                       r'id="my_show_video">.*?<p>(.*?)</p>',
+                       ]
+
     _TEST = {
         u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
         u'file': u'8f862beef863986b2785559b9e1aa599.mp4',
     _TEST = {
         u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
         u'file': u'8f862beef863986b2785559b9e1aa599.mp4',
@@ -20,16 +31,29 @@ class IGNIE(InfoExtractor):
         }
     }
 
         }
     }
 
+    def _find_video_id(self, webpage):
+        res_id = [r'data-video-id="(.+?)"',
+                  r'<object id="vid_(.+?)"',
+                  r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
+                  ]
+        return self._search_regex(res_id, webpage, 'video id')
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        name = mobj.group('name')
-        config_url = url + '.config'
-        webpage = self._download_webpage(url, name)
-        config = json.loads(self._download_webpage(config_url, name, u'Downloading video info'))
-
-        self.report_extraction(name)
-        description = self._html_search_regex(r'<span class="page-object-description">(.+?)</span>',
-                                              webpage, 'video description', flags=re.DOTALL)
+        name_or_id = mobj.group('name_or_id')
+        webpage = self._download_webpage(url, name_or_id)
+        video_id = self._find_video_id(webpage)
+        result = self._get_video_info(video_id)
+        description = self._html_search_regex(self._DESCRIPTION_RE,
+                                              webpage, 'video description',
+                                              flags=re.DOTALL)
+        result['description'] = description
+        return result
+
+    def _get_video_info(self, video_id):
+        config_url = self._CONFIG_URL_TEMPLATE % video_id
+        config = json.loads(self._download_webpage(config_url, video_id,
+                            u'Downloading video info'))
         media = config['playlist']['media']
         video_url = media['url']
 
         media = config['playlist']['media']
         video_url = media['url']
 
@@ -37,9 +61,31 @@ class IGNIE(InfoExtractor):
                 'url': video_url,
                 'ext': determine_ext(video_url),
                 'title': media['metadata']['title'],
                 'url': video_url,
                 'ext': determine_ext(video_url),
                 'title': media['metadata']['title'],
-                'description': description,
-                'thumbnail': media['poster'][0]['url'].replace('{size}', 'small'),
+                'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'),
                 }
                 }
-        
 
 
 
 
+class OneUPIE(IGNIE):
+    """Extractor for 1up.com, it uses the ign videos system."""
+
+    _VALID_URL = r'https?://gamevideos.1up.com/video/id/(?P<name_or_id>.+)'
+    IE_NAME = '1up.com'
+
+    _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
+
+    _TEST = {
+        u'url': u'http://gamevideos.1up.com/video/id/34976',
+        u'file': u'34976.mp4',
+        u'md5': u'68a54ce4ebc772e4b71e3123d413163d',
+        u'info_dict': {
+            u'title': u'Sniper Elite V2 - Trailer',
+            u'description': u'md5:5d289b722f5a6d940ca3136e9dae89cf',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        id = mobj.group('name_or_id')
+        result = super(OneUPIE, self)._real_extract(url)
+        result['id'] = id
+        return result