[espn] Support 'intl' videos (#7858)
[youtube-dl] / youtube_dl / extractor / espn.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4
5
6 class ESPNIE(InfoExtractor):
7     _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
8     _TESTS = [{
9         'url': 'http://espn.go.com/video/clip?id=10365079',
10         'info_dict': {
11             'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
12             'ext': 'mp4',
13             'title': 'dm_140128_30for30Shorts___JudgingJewellv2',
14             'description': None,
15         },
16         'params': {
17             # m3u8 download
18             'skip_download': True,
19         },
20     }, {
21         # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
22         'url': 'http://espn.go.com/video/clip?id=2743663',
23         'info_dict': {
24             'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
25             'ext': 'mp4',
26             'title': 'int_151206_Must_See_Moments_Best_of_MLS_2015_season',
27         },
28         'params': {
29             # m3u8 download
30             'skip_download': True,
31         },
32     }, {
33         'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
34         'only_matching': True,
35     }, {
36         'url': 'http://espn.go.com/nba/recap?gameId=400793786',
37         'only_matching': True,
38     }, {
39         'url': 'http://espn.go.com/blog/golden-state-warriors/post/_/id/593/how-warriors-rapidly-regained-a-winning-edge',
40         'only_matching': True,
41     }, {
42         'url': 'http://espn.go.com/sports/endurance/story/_/id/12893522/dzhokhar-tsarnaev-sentenced-role-boston-marathon-bombings',
43         'only_matching': True,
44     }, {
45         'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
46         'only_matching': True,
47     }]
48
49     def _real_extract(self, url):
50         video_id = self._match_id(url)
51
52         webpage = self._download_webpage(url, video_id)
53
54         video_id = self._search_regex(
55             r'class="video-play-button"[^>]+data-id="(\d+)',
56             webpage, 'video id')
57
58         cms = 'espn'
59         if 'data-source="intl"' in webpage:
60             cms = 'intl'
61         player_url = 'https://espn.go.com/video/iframe/twitter/?id=%s&cms=%s' % (video_id, cms)
62         player = self._download_webpage(
63             player_url, video_id)
64
65         pcode = self._search_regex(
66             r'["\']pcode=([^"\']+)["\']', player, 'pcode')
67
68         return self.url_result(
69             'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode),
70             'OoyalaExternal')