[abcnews] Add support for embed URLs
[youtube-dl] / youtube_dl / extractor / abcnews.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import calendar
5 import re
6 import time
7
8 from .amp import AMPIE
9 from .common import InfoExtractor
10 from ..compat import compat_urlparse
11
12
13 class AbcNewsVideoIE(AMPIE):
14     IE_NAME = 'abcnews:video'
15     _VALID_URL = r'''(?x)
16         https?://abcnews\.go\.com/
17             (?:[^/]+/video/(?P<display_id>[0-9a-z-]+)-|
18                video/embed\?id=)
19        (?P<id>\d+)'''
20
21     _TESTS = [{
22         'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
23         'info_dict': {
24             'id': '20411932',
25             'ext': 'mp4',
26             'display_id': 'week-exclusive-irans-foreign-minister-zarif',
27             'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif',
28             'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
29             'duration': 180,
30             'thumbnail': r're:^https?://.*\.jpg$',
31         },
32         'params': {
33             # m3u8 download
34             'skip_download': True,
35         },
36     }, {
37         'url': 'http://abcnews.go.com/video/embed?id=46979033',
38         'md5': 'd19d160cfa8372fc197f406d4821dd9f',
39         'info_dict': {
40             'id': '46979033',
41             'ext': 'flv',
42             'title': 'Nikki Haley reacts to French election results',
43             'description': 'md5:df1b5ad1b8ad76c4d4e01d3c67d6b38e',
44             'duration': 274,
45             'thumbnail': r're:^https?://.*\.jpg$',
46         },
47     }, {
48         'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
49         'only_matching': True,
50     }]
51
52     def _real_extract(self, url):
53         mobj = re.match(self._VALID_URL, url)
54         display_id = mobj.group('display_id')
55         video_id = mobj.group('id')
56         info_dict = self._extract_feed_info(
57             'http://abcnews.go.com/video/itemfeed?id=%s' % video_id)
58         info_dict.update({
59             'id': video_id,
60             'display_id': display_id,
61         })
62         return info_dict
63
64
65 class AbcNewsIE(InfoExtractor):
66     IE_NAME = 'abcnews'
67     _VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
68
69     _TESTS = [{
70         'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
71         'info_dict': {
72             'id': '10498713',
73             'ext': 'flv',
74             'display_id': 'dramatic-video-rare-death-job-america',
75             'title': 'Occupational Hazards',
76             'description': 'Nightline investigates the dangers that lurk at various jobs.',
77             'thumbnail': r're:^https?://.*\.jpg$',
78             'upload_date': '20100428',
79             'timestamp': 1272412800,
80         },
81         'add_ie': ['AbcNewsVideo'],
82     }, {
83         'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
84         'info_dict': {
85             'id': '39125818',
86             'ext': 'mp4',
87             'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
88             'title': 'Justin Timberlake Drops Hints For Secret Single',
89             'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
90             'upload_date': '20160515',
91             'timestamp': 1463329500,
92         },
93         'params': {
94             # m3u8 download
95             'skip_download': True,
96             # The embedded YouTube video is blocked due to copyright issues
97             'playlist_items': '1',
98         },
99         'add_ie': ['AbcNewsVideo'],
100     }, {
101         'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
102         'only_matching': True,
103     }]
104
105     def _real_extract(self, url):
106         mobj = re.match(self._VALID_URL, url)
107         display_id = mobj.group('display_id')
108         video_id = mobj.group('id')
109
110         webpage = self._download_webpage(url, video_id)
111         video_url = self._search_regex(
112             r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
113         full_video_url = compat_urlparse.urljoin(url, video_url)
114
115         youtube_url = self._html_search_regex(
116             r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"',
117             webpage, 'YouTube URL', default=None)
118
119         timestamp = None
120         date_str = self._html_search_regex(
121             r'<span[^>]+class="timestamp">([^<]+)</span>',
122             webpage, 'timestamp', fatal=False)
123         if date_str:
124             tz_offset = 0
125             if date_str.endswith(' ET'):  # Eastern Time
126                 tz_offset = -5
127                 date_str = date_str[:-3]
128             date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
129             for date_format in date_formats:
130                 try:
131                     timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
132                 except ValueError:
133                     continue
134             if timestamp is not None:
135                 timestamp -= tz_offset * 3600
136
137         entry = {
138             '_type': 'url_transparent',
139             'ie_key': AbcNewsVideoIE.ie_key(),
140             'url': full_video_url,
141             'id': video_id,
142             'display_id': display_id,
143             'timestamp': timestamp,
144         }
145
146         if youtube_url:
147             entries = [entry, self.url_result(youtube_url, 'Youtube')]
148             return self.playlist_result(entries)
149
150         return entry