_ Git - youtube-dl/blob - youtube_dl/extractor/espn.py

   1 from __future__ import unicode_literals
   2
   3 from .common import InfoExtractor
   4 from ..compat import compat_str
   5 from ..utils import (
   6     determine_ext,
   7     int_or_none,
   8     unified_timestamp,
   9 )
  10
  11
  12 class ESPNIE(InfoExtractor):
  13     _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/video/clip(?:\?.*?\bid=|/_/id/)(?P<id>\d+)'
  14     _TESTS = [{
  15         'url': 'http://espn.go.com/video/clip?id=10365079',
  16         'info_dict': {
  17             'id': '10365079',
  18             'ext': 'mp4',
  19             'title': '30 for 30 Shorts: Judging Jewell',
  20             'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f',
  21             'timestamp': 1390936111,
  22             'upload_date': '20140128',
  23         },
  24         'params': {
  25             'skip_download': True,
  26         },
  27     }, {
  28         # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
  29         'url': 'http://espn.go.com/video/clip?id=2743663',
  30         'info_dict': {
  31             'id': '2743663',
  32             'ext': 'mp4',
  33             'title': 'Must-See Moments: Best of the MLS season',
  34             'description': 'md5:4c2d7232beaea572632bec41004f0aeb',
  35             'timestamp': 1449446454,
  36             'upload_date': '20151207',
  37         },
  38         'params': {
  39             'skip_download': True,
  40         },
  41         'expected_warnings': ['Unable to download f4m manifest'],
  42     }, {
  43         'url': 'http://www.espn.com/video/clip?id=10365079',
  44         'only_matching': True,
  45     }, {
  46         'url': 'http://www.espn.com/video/clip/_/id/17989860',
  47         'only_matching': True,
  48     }]
  49
  50     def _real_extract(self, url):
  51         video_id = self._match_id(url)
  52
  53         clip = self._download_json(
  54             'http://api-app.espn.com/v1/video/clips/%s' % video_id,
  55             video_id)['videos'][0]
  56
  57         title = clip['headline']
  58
  59         format_urls = set()
  60         formats = []
  61
  62         def traverse_source(source, base_source_id=None):
  63             for source_id, source in source.items():
  64                 if isinstance(source, compat_str):
  65                     extract_source(source, base_source_id)
  66                 elif isinstance(source, dict):
  67                     traverse_source(
  68                         source,
  69                         '%s-%s' % (base_source_id, source_id)
  70                         if base_source_id else source_id)
  71
  72         def extract_source(source_url, source_id=None):
  73             if source_url in format_urls:
  74                 return
  75             format_urls.add(source_url)
  76             ext = determine_ext(source_url)
  77             if ext == 'smil':
  78                 formats.extend(self._extract_smil_formats(
  79                     source_url, video_id, fatal=False))
  80             elif ext == 'f4m':
  81                 formats.extend(self._extract_f4m_formats(
  82                     source_url, video_id, f4m_id=source_id, fatal=False))
  83             elif ext == 'm3u8':
  84                 formats.extend(self._extract_m3u8_formats(
  85                     source_url, video_id, 'mp4', entry_protocol='m3u8_native',
  86                     m3u8_id=source_id, fatal=False))
  87             else:
  88                 formats.append({
  89                     'url': source_url,
  90                     'format_id': source_id,
  91                 })
  92
  93         traverse_source(clip['links']['source'])
  94         self._sort_formats(formats)
  95
  96         description = clip.get('caption') or clip.get('description')
  97         thumbnail = clip.get('thumbnail')
  98         duration = int_or_none(clip.get('duration'))
  99         timestamp = unified_timestamp(clip.get('originalPublishDate'))
 100
 101         return {
 102             'id': video_id,
 103             'title': title,
 104             'description': description,
 105             'thumbnail': thumbnail,
 106             'timestamp': timestamp,
 107             'duration': duration,
 108             'formats': formats,
 109         }
 110
 111
 112 class ESPNArticleIE(InfoExtractor):
 113     _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
 114     _TESTS = [{
 115         'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
 116         'only_matching': True,
 117     }, {
 118         'url': 'http://espn.go.com/nba/recap?gameId=400793786',
 119         'only_matching': True,
 120     }, {
 121         'url': 'http://espn.go.com/blog/golden-state-warriors/post/_/id/593/how-warriors-rapidly-regained-a-winning-edge',
 122         'only_matching': True,
 123     }, {
 124         'url': 'http://espn.go.com/sports/endurance/story/_/id/12893522/dzhokhar-tsarnaev-sentenced-role-boston-marathon-bombings',
 125         'only_matching': True,
 126     }, {
 127         'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
 128         'only_matching': True,
 129     }]
 130
 131     @classmethod
 132     def suitable(cls, url):
 133         return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url)
 134
 135     def _real_extract(self, url):
 136         video_id = self._match_id(url)
 137
 138         webpage = self._download_webpage(url, video_id)
 139
 140         video_id = self._search_regex(
 141             r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
 142             webpage, 'video id', group='id')
 143
 144         return self.url_result(
 145             'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())