[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / tunein.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import ExtractorError
8 from ..compat import compat_urlparse
9
10
11 class TuneInBaseIE(InfoExtractor):
12     _API_BASE_URL = 'http://tunein.com/tuner/tune/'
13
14     @staticmethod
15     def _extract_urls(webpage):
16         return re.findall(
17             r'<iframe[^>]+src=["\'](?P<url>(?:https?://)?tunein\.com/embed/player/[pst]\d+)',
18             webpage)
19
20     def _real_extract(self, url):
21         content_id = self._match_id(url)
22
23         content_info = self._download_json(
24             self._API_BASE_URL + self._API_URL_QUERY % content_id,
25             content_id, note='Downloading JSON metadata')
26
27         title = content_info['Title']
28         thumbnail = content_info.get('Logo')
29         location = content_info.get('Location')
30         streams_url = content_info.get('StreamUrl')
31         if not streams_url:
32             raise ExtractorError('No downloadable streams found', expected=True)
33         if not streams_url.startswith('http://'):
34             streams_url = compat_urlparse.urljoin(url, streams_url)
35
36         streams = self._download_json(
37             streams_url, content_id, note='Downloading stream data',
38             transform_source=lambda s: re.sub(r'^\s*\((.*)\);\s*$', r'\1', s))['Streams']
39
40         is_live = None
41         formats = []
42         for stream in streams:
43             if stream.get('Type') == 'Live':
44                 is_live = True
45             reliability = stream.get('Reliability')
46             format_note = (
47                 'Reliability: %d%%' % reliability
48                 if reliability is not None else None)
49             formats.append({
50                 'preference': (
51                     0 if reliability is None or reliability > 90
52                     else 1),
53                 'abr': stream.get('Bandwidth'),
54                 'ext': stream.get('MediaType').lower(),
55                 'acodec': stream.get('MediaType'),
56                 'vcodec': 'none',
57                 'url': stream.get('Url'),
58                 'source_preference': reliability,
59                 'format_note': format_note,
60             })
61         self._sort_formats(formats)
62
63         return {
64             'id': content_id,
65             'title': self._live_title(title) if is_live else title,
66             'formats': formats,
67             'thumbnail': thumbnail,
68             'location': location,
69             'is_live': is_live,
70         }
71
72
73 class TuneInClipIE(TuneInBaseIE):
74     IE_NAME = 'tunein:clip'
75     _VALID_URL = r'https?://(?:www\.)?tunein\.com/station/.*?audioClipId\=(?P<id>\d+)'
76     _API_URL_QUERY = '?tuneType=AudioClip&audioclipId=%s'
77
78     _TESTS = [{
79         'url': 'http://tunein.com/station/?stationId=246119&audioClipId=816',
80         'md5': '99f00d772db70efc804385c6b47f4e77',
81         'info_dict': {
82             'id': '816',
83             'title': '32m',
84             'ext': 'mp3',
85         },
86     }]
87
88
89 class TuneInStationIE(TuneInBaseIE):
90     IE_NAME = 'tunein:station'
91     _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:radio/.*?-s|station/.*?StationId=|embed/player/s)(?P<id>\d+)'
92     _API_URL_QUERY = '?tuneType=Station&stationId=%s'
93
94     @classmethod
95     def suitable(cls, url):
96         return False if TuneInClipIE.suitable(url) else super(TuneInStationIE, cls).suitable(url)
97
98     _TESTS = [{
99         'url': 'http://tunein.com/radio/Jazz24-885-s34682/',
100         'info_dict': {
101             'id': '34682',
102             'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
103             'ext': 'mp3',
104             'location': 'Tacoma, WA',
105         },
106         'params': {
107             'skip_download': True,  # live stream
108         },
109     }, {
110         'url': 'http://tunein.com/embed/player/s6404/',
111         'only_matching': True,
112     }]
113
114
115 class TuneInProgramIE(TuneInBaseIE):
116     IE_NAME = 'tunein:program'
117     _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:radio/.*?-p|program/.*?ProgramId=|embed/player/p)(?P<id>\d+)'
118     _API_URL_QUERY = '?tuneType=Program&programId=%s'
119
120     _TESTS = [{
121         'url': 'http://tunein.com/radio/Jazz-24-p2506/',
122         'info_dict': {
123             'id': '2506',
124             'title': 'Jazz 24 on 91.3 WUKY-HD3',
125             'ext': 'mp3',
126             'location': 'Lexington, KY',
127         },
128         'params': {
129             'skip_download': True,  # live stream
130         },
131     }, {
132         'url': 'http://tunein.com/embed/player/p191660/',
133         'only_matching': True,
134     }]
135
136
137 class TuneInTopicIE(TuneInBaseIE):
138     IE_NAME = 'tunein:topic'
139     _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:topic/.*?TopicId=|embed/player/t)(?P<id>\d+)'
140     _API_URL_QUERY = '?tuneType=Topic&topicId=%s'
141
142     _TESTS = [{
143         'url': 'http://tunein.com/topic/?TopicId=101830576',
144         'md5': 'c31a39e6f988d188252eae7af0ef09c9',
145         'info_dict': {
146             'id': '101830576',
147             'title': 'Votez pour moi du 29 octobre 2015 (29/10/15)',
148             'ext': 'mp3',
149             'location': 'Belgium',
150         },
151     }, {
152         'url': 'http://tunein.com/embed/player/t101830576/',
153         'only_matching': True,
154     }]
155
156
157 class TuneInShortenerIE(InfoExtractor):
158     IE_NAME = 'tunein:shortener'
159     IE_DESC = False  # Do not list
160     _VALID_URL = r'https?://tun\.in/(?P<id>[A-Za-z0-9]+)'
161
162     _TEST = {
163         # test redirection
164         'url': 'http://tun.in/ser7s',
165         'info_dict': {
166             'id': '34682',
167             'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
168             'ext': 'mp3',
169             'location': 'Tacoma, WA',
170         },
171         'params': {
172             'skip_download': True,  # live stream
173         },
174     }
175
176     def _real_extract(self, url):
177         redirect_id = self._match_id(url)
178         # The server doesn't support HEAD requests
179         urlh = self._request_webpage(
180             url, redirect_id, note='Downloading redirect page')
181         url = urlh.geturl()
182         self.to_screen('Following redirect: %s' % url)
183         return self.url_result(url)