[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / nfl.py
index 4832b3ce4b765d332ea8e827205547bd062c400c..460deb162df7994caa389b1f37c4174cec3fbf78 100644 (file)
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse_urlparse,
+)
 from ..utils import (
     ExtractorError,
-    compat_urllib_parse,
     int_or_none,
     remove_end,
 )
@@ -14,38 +16,118 @@ from ..utils import (
 
 class NFLIE(InfoExtractor):
     IE_NAME = 'nfl.com'
-    _VALID_URL = r'''(?x)https?://
-        (?P<host>(?:www\.)?(?:nfl\.com|.*?\.clubs\.nfl\.com))/
-        (?:.+?/)*
-        (?P<id>(?:\d[a-z]{2}\d{13}|\w{8}\-(?:\w{4}\-){3}\w{12}))'''
-    _TESTS = [
-        {
-            'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
-            'md5': '394ef771ddcd1354f665b471d78ec4c6',
-            'info_dict': {
-                'id': '0ap3000000398478',
-                'ext': 'mp4',
-                'title': 'Week 3: Redskins vs. Eagles highlights',
-                'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
-                'upload_date': '20140921',
-                'timestamp': 1411337580,
-                'thumbnail': 're:^https?://.*\.jpg$',
-            }
-        },
-        {
-            'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266',
-            'md5': 'cf85bdb4bc49f6e9d3816d130c78279c',
-            'info_dict': {
-                'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266',
-                'ext': 'mp4',
-                'title': 'LIVE: Post Game vs. Browns',
-                'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8',
-                'upload_date': '20131229',
-                'timestamp': 1388354455,
-                'thumbnail': 're:^https?://.*\.jpg$',
-            }
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?P<host>
+                            (?:www\.)?
+                            (?:
+                                (?:
+                                    nfl|
+                                    buffalobills|
+                                    miamidolphins|
+                                    patriots|
+                                    newyorkjets|
+                                    baltimoreravens|
+                                    bengals|
+                                    clevelandbrowns|
+                                    steelers|
+                                    houstontexans|
+                                    colts|
+                                    jaguars|
+                                    titansonline|
+                                    denverbroncos|
+                                    kcchiefs|
+                                    raiders|
+                                    chargers|
+                                    dallascowboys|
+                                    giants|
+                                    philadelphiaeagles|
+                                    redskins|
+                                    chicagobears|
+                                    detroitlions|
+                                    packers|
+                                    vikings|
+                                    atlantafalcons|
+                                    panthers|
+                                    neworleanssaints|
+                                    buccaneers|
+                                    azcardinals|
+                                    stlouisrams|
+                                    49ers|
+                                    seahawks
+                                )\.com|
+                                .+?\.clubs\.nfl\.com
+                            )
+                        )/
+                        (?:.+?/)*
+                        (?P<id>[^/#?&]+)
+                    '''
+    _TESTS = [{
+        'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
+        'md5': '394ef771ddcd1354f665b471d78ec4c6',
+        'info_dict': {
+            'id': '0ap3000000398478',
+            'ext': 'mp4',
+            'title': 'Week 3: Redskins vs. Eagles highlights',
+            'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
+            'upload_date': '20140921',
+            'timestamp': 1411337580,
+            'thumbnail': r're:^https?://.*\.jpg$',
         }
-    ]
+    }, {
+        'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266',
+        'md5': 'cf85bdb4bc49f6e9d3816d130c78279c',
+        'info_dict': {
+            'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266',
+            'ext': 'mp4',
+            'title': 'LIVE: Post Game vs. Browns',
+            'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8',
+            'upload_date': '20131229',
+            'timestamp': 1388354455,
+            'thumbnail': r're:^https?://.*\.jpg$',
+        }
+    }, {
+        'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish',
+        'info_dict': {
+            'id': '0ap3000000467607',
+            'ext': 'mp4',
+            'title': 'Frustrations flare on the field',
+            'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.',
+            'timestamp': 1422850320,
+            'upload_date': '20150202',
+        },
+    }, {
+        'url': 'http://www.patriots.com/video/2015/09/18/10-days-gillette',
+        'md5': '4c319e2f625ffd0b481b4382c6fc124c',
+        'info_dict': {
+            'id': 'n-238346',
+            'ext': 'mp4',
+            'title': '10 Days at Gillette',
+            'description': 'md5:8cd9cd48fac16de596eadc0b24add951',
+            'timestamp': 1442618809,
+            'upload_date': '20150918',
+        },
+    }, {
+        # lowercase data-contentid
+        'url': 'http://www.steelers.com/news/article-1/Tomlin-on-Ben-getting-Vick-ready/56399c96-4160-48cf-a7ad-1d17d4a3aef7',
+        'info_dict': {
+            'id': '12693586-6ea9-4743-9c1c-02c59e4a5ef2',
+            'ext': 'mp4',
+            'title': 'Tomlin looks ahead to Ravens on a short week',
+            'description': 'md5:32f3f7b139f43913181d5cbb24ecad75',
+            'timestamp': 1443459651,
+            'upload_date': '20150928',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.buffalobills.com/video/videos/Rex_Ryan_Show_World_Wide_Rex/b1dcfab2-3190-4bb1-bfc0-d6e603d6601a',
+        'only_matching': True,
+    }]
 
     @staticmethod
     def prepend_host(host, url):
@@ -78,9 +160,14 @@ class NFLIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         config_url = NFLIE.prepend_host(host, self._search_regex(
-            r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL'))
-        config = self._download_json(config_url, video_id,
-                                     note='Downloading player config')
+            r'(?:(?:config|configURL)\s*:\s*|<nflcs:avplayer[^>]+data-config\s*=\s*)(["\'])(?P<config>.+?)\1',
+            webpage, 'config URL', default='static/content/static/config/video/config.json',
+            group='config'))
+        # For articles, the id in the url is not the video id
+        video_id = self._search_regex(
+            r'(?:<nflcs:avplayer[^>]+data-content[Ii]d\s*=\s*|content[Ii]d\s*:\s*)(["\'])(?P<id>(?:(?!\1).)+)\1',
+            webpage, 'video id', default=video_id, group='id')
+        config = self._download_json(config_url, video_id, 'Downloading player config')
         url_template = NFLIE.prepend_host(
             host, '{contentURLTemplate:}'.format(**config))
         video_data = self._download_json(
@@ -90,7 +177,7 @@ class NFLIE(InfoExtractor):
         cdn_data = video_data.get('cdnData', {})
         streams = cdn_data.get('bitrateInfo', [])
         if cdn_data.get('format') == 'EXTERNAL_HTTP_STREAM':
-            parts = compat_urllib_parse.urlparse(cdn_data.get('uri'))
+            parts = compat_urllib_parse_urlparse(cdn_data.get('uri'))
             protocol, host = parts.scheme, parts.netloc
             for stream in streams:
                 formats.append(