Merge pull request #9395 from pmrowla/afreecatv
[youtube-dl] / youtube_dl / extractor / cbslocal.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import calendar
5 import datetime
6
7 from .anvato import AnvatoIE
8 from .sendtonews import SendtoNewsIE
9 from ..compat import compat_urlparse
10
11
12 class CBSLocalIE(AnvatoIE):
13     _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
14
15     _TESTS = [{
16         # Anvato backend
17         'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis',
18         'md5': 'f0ee3081e3843f575fccef901199b212',
19         'info_dict': {
20             'id': '3401037',
21             'ext': 'mp4',
22             'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
23             'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.',
24             'thumbnail': 're:^https?://.*',
25             'timestamp': 1463440500,
26             'upload_date': '20160516',
27             'subtitles': {
28                 'en': 'mincount:5',
29             },
30             'categories': [
31                 'Stations\\Spoken Word\\KCBSTV',
32                 'Syndication\\MSN',
33                 'Syndication\\NDN',
34                 'Syndication\\AOL',
35                 'Syndication\\Yahoo',
36                 'Syndication\\Tribune',
37                 'Syndication\\Curb.tv',
38                 'Content\\News'
39             ],
40         },
41     }, {
42         # SendtoNews embed
43         'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
44         'info_dict': {
45             'id': 'GxfCe0Zo7D-175909-5588',
46             'ext': 'mp4',
47             'title': 'Recap: CLE 15, CIN 6',
48             'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
49             'upload_date': '20160516',
50             'timestamp': 1463433840,
51             'duration': 49,
52         },
53         'params': {
54             # m3u8 download
55             'skip_download': True,
56         },
57     }]
58
59     def _real_extract(self, url):
60         display_id = self._match_id(url)
61         webpage = self._download_webpage(url, display_id)
62
63         sendtonews_url = SendtoNewsIE._extract_url(webpage)
64         if sendtonews_url:
65             info_dict = {
66                 '_type': 'url_transparent',
67                 'url': compat_urlparse.urljoin(url, sendtonews_url),
68             }
69         else:
70             info_dict = self._extract_anvato_videos(webpage, display_id)
71
72         time_str = self._html_search_regex(
73             r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
74         timestamp = None
75         if time_str:
76             timestamp = calendar.timegm(datetime.datetime.strptime(
77                 time_str, '%b %d, %Y %I:%M %p').timetuple())
78
79         info_dict.update({
80             'display_id': display_id,
81             'timestamp': timestamp,
82         })
83
84         return info_dict