Merge pull request #12909 from remitamine/raw-sub
[youtube-dl] / youtube_dl / extractor / espn.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..compat import compat_str
5 from ..utils import (
6     determine_ext,
7     int_or_none,
8     unified_timestamp,
9 )
10
11
12 class ESPNIE(InfoExtractor):
13     _VALID_URL = r'''(?x)
14                     https?://
15                         (?:
16                             (?:(?:\w+\.)+)?espn\.go|
17                             (?:www\.)?espn
18                         )\.com/
19                         (?:
20                             (?:
21                                 video/clip|
22                                 watch/player
23                             )
24                             (?:
25                                 \?.*?\bid=|
26                                 /_/id/
27                             )
28                         )
29                         (?P<id>\d+)
30                     '''
31
32     _TESTS = [{
33         'url': 'http://espn.go.com/video/clip?id=10365079',
34         'info_dict': {
35             'id': '10365079',
36             'ext': 'mp4',
37             'title': '30 for 30 Shorts: Judging Jewell',
38             'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f',
39             'timestamp': 1390936111,
40             'upload_date': '20140128',
41         },
42         'params': {
43             'skip_download': True,
44         },
45     }, {
46         'url': 'https://broadband.espn.go.com/video/clip?id=18910086',
47         'info_dict': {
48             'id': '18910086',
49             'ext': 'mp4',
50             'title': 'Kyrie spins around defender for two',
51             'description': 'md5:2b0f5bae9616d26fba8808350f0d2b9b',
52             'timestamp': 1489539155,
53             'upload_date': '20170315',
54         },
55         'params': {
56             'skip_download': True,
57         },
58         'expected_warnings': ['Unable to download f4m manifest'],
59     }, {
60         'url': 'http://nonredline.sports.espn.go.com/video/clip?id=19744672',
61         'only_matching': True,
62     }, {
63         'url': 'https://cdn.espn.go.com/video/clip/_/id/19771774',
64         'only_matching': True,
65     }, {
66         'url': 'http://www.espn.com/watch/player?id=19141491',
67         'only_matching': True,
68     }, {
69         'url': 'http://www.espn.com/watch/player?bucketId=257&id=19505875',
70         'only_matching': True,
71     }, {
72         'url': 'http://www.espn.com/watch/player/_/id/19141491',
73         'only_matching': True,
74     }, {
75         'url': 'http://www.espn.com/video/clip?id=10365079',
76         'only_matching': True,
77     }, {
78         'url': 'http://www.espn.com/video/clip/_/id/17989860',
79         'only_matching': True,
80     }]
81
82     def _real_extract(self, url):
83         video_id = self._match_id(url)
84
85         clip = self._download_json(
86             'http://api-app.espn.com/v1/video/clips/%s' % video_id,
87             video_id)['videos'][0]
88
89         title = clip['headline']
90
91         format_urls = set()
92         formats = []
93
94         def traverse_source(source, base_source_id=None):
95             for source_id, source in source.items():
96                 if isinstance(source, compat_str):
97                     extract_source(source, base_source_id)
98                 elif isinstance(source, dict):
99                     traverse_source(
100                         source,
101                         '%s-%s' % (base_source_id, source_id)
102                         if base_source_id else source_id)
103
104         def extract_source(source_url, source_id=None):
105             if source_url in format_urls:
106                 return
107             format_urls.add(source_url)
108             ext = determine_ext(source_url)
109             if ext == 'smil':
110                 formats.extend(self._extract_smil_formats(
111                     source_url, video_id, fatal=False))
112             elif ext == 'f4m':
113                 formats.extend(self._extract_f4m_formats(
114                     source_url, video_id, f4m_id=source_id, fatal=False))
115             elif ext == 'm3u8':
116                 formats.extend(self._extract_m3u8_formats(
117                     source_url, video_id, 'mp4', entry_protocol='m3u8_native',
118                     m3u8_id=source_id, fatal=False))
119             else:
120                 formats.append({
121                     'url': source_url,
122                     'format_id': source_id,
123                 })
124
125         traverse_source(clip['links']['source'])
126         self._sort_formats(formats)
127
128         description = clip.get('caption') or clip.get('description')
129         thumbnail = clip.get('thumbnail')
130         duration = int_or_none(clip.get('duration'))
131         timestamp = unified_timestamp(clip.get('originalPublishDate'))
132
133         return {
134             'id': video_id,
135             'title': title,
136             'description': description,
137             'thumbnail': thumbnail,
138             'timestamp': timestamp,
139             'duration': duration,
140             'formats': formats,
141         }
142
143
144 class ESPNArticleIE(InfoExtractor):
145     _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
146     _TESTS = [{
147         'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
148         'only_matching': True,
149     }, {
150         'url': 'http://espn.go.com/nba/recap?gameId=400793786',
151         'only_matching': True,
152     }, {
153         'url': 'http://espn.go.com/blog/golden-state-warriors/post/_/id/593/how-warriors-rapidly-regained-a-winning-edge',
154         'only_matching': True,
155     }, {
156         'url': 'http://espn.go.com/sports/endurance/story/_/id/12893522/dzhokhar-tsarnaev-sentenced-role-boston-marathon-bombings',
157         'only_matching': True,
158     }, {
159         'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
160         'only_matching': True,
161     }]
162
163     @classmethod
164     def suitable(cls, url):
165         return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url)
166
167     def _real_extract(self, url):
168         video_id = self._match_id(url)
169
170         webpage = self._download_webpage(url, video_id)
171
172         video_id = self._search_regex(
173             r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
174             webpage, 'video id', group='id')
175
176         return self.url_result(
177             'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())