[ooyala] check manifest ext with determine_ext and update tests for related extractors
[youtube-dl] / youtube_dl / extractor / espn.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..utils import remove_end
5
6
7 class ESPNIE(InfoExtractor):
8     _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
9     _TESTS = [{
10         'url': 'http://espn.go.com/video/clip?id=10365079',
11         'md5': '60e5d097a523e767d06479335d1bdc58',
12         'info_dict': {
13             'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
14             'ext': 'mp4',
15             'title': '30 for 30 Shorts: Judging Jewell',
16             'description': None,
17         },
18         'add_ie': ['OoyalaExternal'],
19     }, {
20         # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
21         'url': 'http://espn.go.com/video/clip?id=2743663',
22         'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
23         'info_dict': {
24             'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
25             'ext': 'mp4',
26             'title': 'Must-See Moments: Best of the MLS season',
27         },
28         'add_ie': ['OoyalaExternal'],
29     }, {
30         'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
31         'only_matching': True,
32     }, {
33         'url': 'http://espn.go.com/nba/recap?gameId=400793786',
34         'only_matching': True,
35     }, {
36         'url': 'http://espn.go.com/blog/golden-state-warriors/post/_/id/593/how-warriors-rapidly-regained-a-winning-edge',
37         'only_matching': True,
38     }, {
39         'url': 'http://espn.go.com/sports/endurance/story/_/id/12893522/dzhokhar-tsarnaev-sentenced-role-boston-marathon-bombings',
40         'only_matching': True,
41     }, {
42         'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
43         'only_matching': True,
44     }]
45
46     def _real_extract(self, url):
47         video_id = self._match_id(url)
48
49         webpage = self._download_webpage(url, video_id)
50
51         video_id = self._search_regex(
52             r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
53             webpage, 'video id', group='id')
54
55         cms = 'espn'
56         if 'data-source="intl"' in webpage:
57             cms = 'intl'
58         player_url = 'https://espn.go.com/video/iframe/twitter/?id=%s&cms=%s' % (video_id, cms)
59         player = self._download_webpage(
60             player_url, video_id)
61
62         pcode = self._search_regex(
63             r'["\']pcode=([^"\']+)["\']', player, 'pcode')
64
65         title = remove_end(
66             self._og_search_title(webpage),
67             '- ESPN Video').strip()
68
69         return {
70             '_type': 'url_transparent',
71             'url': 'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode),
72             'ie_key': 'OoyalaExternal',
73             'title': title,
74         }