[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / ign.py
index 6160f03d134ccbf38baae95c1011f4917b42052b..a96ea801019c808e6a8fe1f8f6590b0e018feeef 100644 (file)
@@ -89,6 +89,11 @@ class IGNIE(InfoExtractor):
             'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
             'only_matching': True,
         },
+        {
+            # videoId pattern
+            'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
+            'only_matching': True,
+        },
     ]
 
     def _find_video_id(self, webpage):
@@ -98,6 +103,8 @@ class IGNIE(InfoExtractor):
             r'data-video-id="(.+?)"',
             r'<object id="vid_(.+?)"',
             r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
+            r'videoId&quot;\s*:\s*&quot;(.+?)&quot;',
+            r'videoId["\']\s*:\s*["\']([^"\']+?)["\']',
         ]
         return self._search_regex(res_id, webpage, 'video id', default=None)
 
@@ -109,7 +116,7 @@ class IGNIE(InfoExtractor):
         if page_type != 'video':
             multiple_urls = re.findall(
                 r'<param name="flashvars"[^>]*value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
-                 webpage)
+                webpage)
             if multiple_urls:
                 entries = [self.url_result(u, ie='IGN') for u in multiple_urls]
                 return {
@@ -120,23 +127,24 @@ class IGNIE(InfoExtractor):
 
         video_id = self._find_video_id(webpage)
         if not video_id:
-            return self.url_result(self._search_regex(self._EMBED_RE, webpage, 'embed url'))
+            return self.url_result(self._search_regex(
+                self._EMBED_RE, webpage, 'embed url'))
         return self._get_video_info(video_id)
 
     def _get_video_info(self, video_id):
-        api_data = self._download_json(self._API_URL_TEMPLATE % video_id, video_id)
+        api_data = self._download_json(
+            self._API_URL_TEMPLATE % video_id, video_id)
 
         formats = []
         m3u8_url = api_data['refs'].get('m3uUrl')
         if m3u8_url:
-            m3u8_formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
-            if m3u8_formats:
-                formats.extend(m3u8_formats)
+            formats.extend(self._extract_m3u8_formats(
+                m3u8_url, video_id, 'mp4', 'm3u8_native',
+                m3u8_id='hls', fatal=False))
         f4m_url = api_data['refs'].get('f4mUrl')
         if f4m_url:
-            f4m_formats = self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)
-            if f4m_formats:
-                formats.extend(f4m_formats)
+            formats.extend(self._extract_f4m_formats(
+                f4m_url, video_id, f4m_id='hds', fatal=False))
         for asset in api_data['assets']:
             formats.append({
                 'url': asset['url'],
@@ -195,7 +203,7 @@ class PCMagIE(IGNIE):
     _VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?P<type>videos|article2)(/.+)?/(?P<name_or_id>.+)'
     IE_NAME = 'pcmag'
 
-    _EMBED_RE = r'iframe.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content.html?[^"]*url=([^"]+)["&]'
+    _EMBED_RE = r'iframe\.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content\.html?[^"]*url=([^"]+)["&]'
 
     _TESTS = [{
         'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
@@ -209,7 +217,7 @@ class PCMagIE(IGNIE):
             'upload_date': '20150106',
             'uploader_id': 'cozzipix@gmail.com',
         }
-    },{
+    }, {
         'url': 'http://www.pcmag.com/article2/0,2817,2470156,00.asp',
         'md5': '94130c1ca07ba0adb6088350681f16c1',
         'info_dict': {