_ Git - youtube-dl/blob - youtube_dl/extractor/tunein.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import json
   5 import re
   6
   7 from .common import InfoExtractor
   8 from ..utils import ExtractorError
   9
  10
  11 class TuneInIE(InfoExtractor):
  12     _VALID_URL = r'''(?x)https?://(?:www\.)?
  13     (?:
  14         tunein\.com/
  15         (?:
  16             radio/.*?-s|
  17             station/.*?StationId\=
  18         )(?P<id>[0-9]+)
  19         |tun\.in/(?P<redirect_id>[A-Za-z0-9]+)
  20     )
  21     '''
  22
  23     _INFO_DICT = {
  24         'id': '34682',
  25         'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
  26         'ext': 'AAC',
  27         'thumbnail': 're:^https?://.*\.png$',
  28         'location': 'Tacoma, WA',
  29     }
  30     _TESTS = [
  31         {
  32             'url': 'http://tunein.com/radio/Jazz24-885-s34682/',
  33             'info_dict': _INFO_DICT,
  34             'params': {
  35                 'skip_download': True,  # live stream
  36             },
  37         },
  38         {  # test redirection
  39             'url': 'http://tun.in/ser7s',
  40             'info_dict': _INFO_DICT,
  41             'params': {
  42                 'skip_download': True,  # live stream
  43             },
  44         },
  45     ]
  46
  47     def _real_extract(self, url):
  48         mobj = re.match(self._VALID_URL, url)
  49         redirect_id = mobj.group('redirect_id')
  50         if redirect_id:
  51             # The server doesn't support HEAD requests
  52             urlh = self._request_webpage(
  53                 url, redirect_id, note='Downloading redirect page')
  54             url = urlh.geturl()
  55             self.to_screen('Following redirect: %s' % url)
  56             mobj = re.match(self._VALID_URL, url)
  57         station_id = mobj.group('id')
  58
  59         webpage = self._download_webpage(
  60             url, station_id, note='Downloading station webpage')
  61
  62         payload = self._html_search_regex(
  63             r'(?m)TuneIn\.payload\s*=\s*(\{[^$]+?)$', webpage, 'JSON data')
  64         json_data = json.loads(payload)
  65         station_info = json_data['Station']['broadcast']
  66         title = station_info['Title']
  67         thumbnail = station_info.get('Logo')
  68         location = station_info.get('Location')
  69         streams_url = station_info.get('StreamUrl')
  70         if not streams_url:
  71             raise ExtractorError('No downloadable streams found',
  72                                  expected=True)
  73         stream_data = self._download_webpage(
  74             streams_url, station_id, note='Downloading stream data')
  75         streams = json.loads(self._search_regex(
  76             r'\((.*)\);', stream_data, 'stream info'))['Streams']
  77
  78         is_live = None
  79         formats = []
  80         for stream in streams:
  81             if stream.get('Type') == 'Live':
  82                 is_live = True
  83             formats.append({
  84                 'abr': stream.get('Bandwidth'),
  85                 'ext': stream.get('MediaType'),
  86                 'acodec': stream.get('MediaType'),
  87                 'vcodec': 'none',
  88                 'url': stream.get('Url'),
  89                 # Sometimes streams with the highest quality do not exist
  90                 'preference': stream.get('Reliability'),
  91             })
  92         self._sort_formats(formats)
  93
  94         return {
  95             'id': station_id,
  96             'title': title,
  97             'formats': formats,
  98             'thumbnail': thumbnail,
  99             'location': location,
 100             'is_live': is_live,
 101         }