Move tests to the IE definitions
[youtube-dl] / youtube_dl / extractor / rbmaradio.py
1 import json
2 import re
3
4 from .common import InfoExtractor
5 from ..utils import (
6     compat_urllib_parse_urlparse,
7
8     ExtractorError,
9 )
10
11
12 class RBMARadioIE(InfoExtractor):
13     _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
14     _TEST = {
15         u'url': u'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
16         u'file': u'ford-lopatin-live-at-primavera-sound-2011.mp3',
17         u'md5': u'6bc6f9bcb18994b4c983bc3bf4384d95',
18         u'info_dict': {
19             u"uploader_id": u"ford-lopatin", 
20             u"location": u"Spain", 
21             u"description": u"Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.", 
22             u"uploader": u"Ford & Lopatin", 
23             u"title": u"Live at Primavera Sound 2011"
24         }
25     }
26
27     def _real_extract(self, url):
28         m = re.match(self._VALID_URL, url)
29         video_id = m.group('videoID')
30
31         webpage = self._download_webpage(url, video_id)
32
33         json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
34             webpage, u'json data', flags=re.MULTILINE)
35
36         try:
37             data = json.loads(json_data)
38         except ValueError as e:
39             raise ExtractorError(u'Invalid JSON: ' + str(e))
40
41         video_url = data['akamai_url'] + '&cbr=256'
42         url_parts = compat_urllib_parse_urlparse(video_url)
43         video_ext = url_parts.path.rpartition('.')[2]
44         info = {
45                 'id': video_id,
46                 'url': video_url,
47                 'ext': video_ext,
48                 'title': data['title'],
49                 'description': data.get('teaser_text'),
50                 'location': data.get('country_of_origin'),
51                 'uploader': data.get('host', {}).get('name'),
52                 'uploader_id': data.get('host', {}).get('slug'),
53                 'thumbnail': data.get('image', {}).get('large_url_2x'),
54                 'duration': data.get('duration'),
55         }
56         return [info]