Merge remote-tracking branch 'origin/master'
[youtube-dl] / youtube_dl / extractor / tunein.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import json
5 import re
6
7 from .common import InfoExtractor
8 from ..utils import ExtractorError
9
10
11 class TuneInIE(InfoExtractor):
12     _VALID_URL = r'''(?x)https?://(?:www\.)?
13     (?:
14         tunein\.com/
15         (?:
16             radio/.*?-s|
17             station/.*?StationId\=
18         )(?P<id>[0-9]+)
19         |tun\.in/(?P<redirect_id>[A-Za-z0-9]+)
20     )
21     '''
22
23     _INFO_DICT = {
24         'id': '34682',
25         'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
26         'ext': 'AAC',
27         'thumbnail': 're:^https?://.*\.png$',
28         'location': 'Tacoma, WA',
29     }
30     _TESTS = [
31         {
32             'url': 'http://tunein.com/radio/Jazz24-885-s34682/',
33             'info_dict': _INFO_DICT,
34             'params': {
35                 'skip_download': True,  # live stream
36             },
37         },
38         {  # test redirection
39             'url': 'http://tun.in/ser7s',
40             'info_dict': _INFO_DICT,
41             'params': {
42                 'skip_download': True,  # live stream
43             },
44         },
45     ]
46
47     def _real_extract(self, url):
48         mobj = re.match(self._VALID_URL, url)
49         redirect_id = mobj.group('redirect_id')
50         if redirect_id:
51             # The server doesn't support HEAD requests
52             urlh = self._request_webpage(
53                 url, redirect_id, note='Downloading redirect page')
54             url = urlh.geturl()
55             self.to_screen('Following redirect: %s' % url)
56             mobj = re.match(self._VALID_URL, url)
57         station_id = mobj.group('id')
58
59         webpage = self._download_webpage(
60             url, station_id, note='Downloading station webpage')
61
62         payload = self._html_search_regex(
63             r'(?m)TuneIn\.payload\s*=\s*(\{[^$]+?)$', webpage, 'JSON data')
64         json_data = json.loads(payload)
65         station_info = json_data['Station']['broadcast']
66         title = station_info['Title']
67         thumbnail = station_info.get('Logo')
68         location = station_info.get('Location')
69         streams_url = station_info.get('StreamUrl')
70         if not streams_url:
71             raise ExtractorError('No downloadable streams found',
72                                  expected=True)
73         stream_data = self._download_webpage(
74             streams_url, station_id, note='Downloading stream data')
75         streams = json.loads(self._search_regex(
76             r'\((.*)\);', stream_data, 'stream info'))['Streams']
77
78         is_live = None
79         formats = []
80         for stream in streams:
81             if stream.get('Type') == 'Live':
82                 is_live = True
83             formats.append({
84                 'abr': stream.get('Bandwidth'),
85                 'ext': stream.get('MediaType'),
86                 'acodec': stream.get('MediaType'),
87                 'vcodec': 'none',
88                 'url': stream.get('Url'),
89                 # Sometimes streams with the highest quality do not exist
90                 'preference': stream.get('Reliability'),
91             })
92         self._sort_formats(formats)
93
94         return {
95             'id': station_id,
96             'title': title,
97             'formats': formats,
98             'thumbnail': thumbnail,
99             'location': location,
100             'is_live': is_live,
101         }