[acast] Fix extraction
[youtube-dl] / youtube_dl / extractor / acast.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import int_or_none
9
10
11 class ACastBaseIE(InfoExtractor):
12     _API_BASE_URL = 'https://www.acast.com/api/'
13
14
15 class ACastIE(ACastBaseIE):
16     IE_NAME = 'acast'
17     _VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
18     _TEST = {
19         'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan',
20         'md5': 'ada3de5a1e3a2a381327d749854788bb',
21         'info_dict': {
22             'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
23             'ext': 'mp3',
24             'title': '"Where Are You?": Taipei 101, Taiwan',
25             'timestamp': 1196172000000,
26             'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
27             'duration': 211,
28         }
29     }
30
31     def _real_extract(self, url):
32         channel, display_id = re.match(self._VALID_URL, url).groups()
33
34         embed_page = self._download_webpage(
35             re.sub('(?:www\.)?acast\.com', 'embedcdn.acast.com', url), display_id)
36         cast_data = self._parse_json(self._search_regex(
37             r'window\[\'acast/queries\'\]\s*=\s*([^;]+);', embed_page, 'acast data'),
38             display_id)['GetAcast/%s/%s' % (channel, display_id)]
39
40         return {
41             'id': compat_str(cast_data['id']),
42             'display_id': display_id,
43             'url': cast_data['blings'][0]['audio'],
44             'title': cast_data['name'],
45             'description': cast_data.get('description'),
46             'thumbnail': cast_data.get('image'),
47             'timestamp': int_or_none(cast_data.get('publishingDate')),
48             'duration': int_or_none(cast_data.get('duration')),
49         }
50
51
52 class ACastChannelIE(ACastBaseIE):
53     IE_NAME = 'acast:channel'
54     _VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<id>[^/#?]+)'
55     _TEST = {
56         'url': 'https://www.acast.com/condenasttraveler',
57         'info_dict': {
58             'id': '50544219-29bb-499e-a083-6087f4cb7797',
59             'title': 'Condé Nast Traveler Podcast',
60             'description': 'md5:98646dee22a5b386626ae31866638fbd',
61         },
62         'playlist_mincount': 20,
63     }
64
65     @classmethod
66     def suitable(cls, url):
67         return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
68
69     def _real_extract(self, url):
70         display_id = self._match_id(url)
71         channel_data = self._download_json(self._API_BASE_URL + 'channels/%s' % display_id, display_id)
72         casts = self._download_json(self._API_BASE_URL + 'channels/%s/acasts' % display_id, display_id)
73         entries = [self.url_result('https://www.acast.com/%s/%s' % (display_id, cast['url']), 'ACast') for cast in casts]
74
75         return self.playlist_result(entries, compat_str(channel_data['id']), channel_data['name'], channel_data.get('description'))