[nfl] Add new extractor. (Closes #3815)

author Naglis Jonaitis <njonaitis@gmail.com>

Mon, 22 Sep 2014 21:28:19 +0000 (00:28 +0300)

committer Naglis Jonaitis <njonaitis@gmail.com>

Mon, 22 Sep 2014 21:28:19 +0000 (00:28 +0300)
author Naglis Jonaitis <njonaitis@gmail.com>
Mon, 22 Sep 2014 21:28:19 +0000 (00:28 +0300)
committer Naglis Jonaitis <njonaitis@gmail.com>
Mon, 22 Sep 2014 21:28:19 +0000 (00:28 +0300)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 244d222970e7cbb0178695babd85bd7132d972fd..1f1fc0eb269d5c34821692193da1c93849ff8276 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -240,6 +240,7 @@ from .ndtv import NDTVIE
  from .newgrounds import NewgroundsIE
  from .newstube import NewstubeIE
  from .nfb import NFBIE
  from .newgrounds import NewgroundsIE
  from .newstube import NewstubeIE
  from .nfb import NFBIE
+from .nfl import NFLIE
  from .nhl import NHLIE, NHLVideocenterIE
  from .niconico import NiconicoIE
  from .ninegag import NineGagIE
  from .nhl import NHLIE, NHLVideocenterIE
  from .niconico import NiconicoIE
  from .ninegag import NineGagIE
diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py

new file mode 100644 (file)

index 0000000..f53596f
--- /dev/null
+++ b/youtube_dl/extractor/nfl.py
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    remove_end,
+)
+
+
+class NFLIE(InfoExtractor):
+    IE_NAME = 'nfl.com'
+    _VALID_URL = r'(?x)https?://(?:www\.)?nfl\.com/(?:videos/(?:.+)/|.*?\#video=)(?P<id>\d..[0-9]+)'
+    _PLAYER_CONFIG_URL = 'http://www.nfl.com/static/content/static/config/video/config.json'
+    _TEST = {
+        'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
+        'skip_download': True,  # md5 sum fluctuates
+        'info_dict': {
+            'id': '0ap3000000398478',
+            'ext': 'mp4',
+            'title': 'Week 3: Washington Redskins vs. Philadelphia Eagles highlights',
+            'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
+            'upload_date': '20140921',
+            'timestamp': 1411337580,
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        config = self._download_json(self._PLAYER_CONFIG_URL, video_id,
+                                     note='Downloading player config')
+        url_template = 'http://nfl.com{contentURLTemplate:s}'.format(**config)
+        video_data = self._download_json(url_template.format(id=video_id), video_id)
+
+        cdns = config.get('cdns')
+        if not cdns:
+            raise ExtractorError('Failed to get CDN data', expected=True)
+
+        formats = []
+        streams = video_data.get('cdnData', {}).get('bitrateInfo', [])
+        for name, cdn in cdns.items():
+            # LimeLight streams don't seem to work
+            if cdn.get('name') == 'LIMELIGHT':
+                continue
+
+            protocol = cdn.get('protocol')
+            host = remove_end(cdn.get('host', ''), '/')
+            if not (protocol and host):
+                continue
+
+            path_prefix = cdn.get('pathprefix', '')
+            if path_prefix and not path_prefix.endswith('/'):
+                path_prefix = '%s/' % path_prefix
+
+            get_url = lambda p: '{protocol:s}://{host:s}/{prefix:s}{path:}'.format(
+                protocol=protocol,
+                host=host,
+                prefix=path_prefix,
+                path=p,
+            )
+
+            if protocol == 'rtmp':
+                preference = -2
+            elif 'prog' in name.lower():
+                preference = -1
+            else:
+                preference = 0
+
+            for stream in streams:
+                path = stream.get('path')
+                if not path:
+                    continue
+
+                formats.append({
+                    'url': get_url(path),
+                    'vbr': int_or_none(stream.get('rate', 0), 1000),
+                    'preference': preference,
+                    'format_note': name,
+                })
+
+        self._sort_formats(formats)
+
+        thumbnail = None
+        for q in ('xl', 'l', 'm', 's', 'xs'):
+            thumbnail = video_data.get('imagePaths', {}).get(q)
+            if thumbnail:
+                break
+
+        return {
+            'id': video_id,
+            'title': video_data.get('storyHeadline'),
+            'formats': formats,
+            'description': video_data.get('caption'),
+            'duration': video_data.get('duration'),
+            'thumbnail': thumbnail,
+            'timestamp': int_or_none(video_data.get('posted'), 1000),
+        }
author	Naglis Jonaitis <njonaitis@gmail.com>
	Mon, 22 Sep 2014 21:28:19 +0000 (00:28 +0300)
committer	Naglis Jonaitis <njonaitis@gmail.com>
	Mon, 22 Sep 2014 21:28:19 +0000 (00:28 +0300)
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/nfl.py	[new file with mode: 0644]	patch \| blob