[thisamericanlife] Add a new extractor
[youtube-dl] / youtube_dl / extractor / thisamericanlife.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5
6
7 class ThisAmericanLifeIE(InfoExtractor):
8     _VALID_URL = r'https?://(?:www\.)?thisamericanlife\.org/radio-archives/episode/(?P<id>\d+)'
9     _TEST = {
10         'url': 'http://www.thisamericanlife.org/radio-archives/episode/487/harper-high-school-part-one',
11         'md5': '5cda28076c9f9d1fd0b0f5cff5959948',
12         'info_dict': {
13             'id': '487',
14             'title': '487: Harper High School, Part One',
15             'url' : 'http://stream.thisamericanlife.org/487/stream/487_64k.m3u8',
16             'ext': 'aac',
17         }
18     }
19
20     def _real_extract(self, url):
21         video_id = self._match_id(url)
22         webpage = self._download_webpage(url, video_id)
23
24         title = self._html_search_regex(r'<h1[^>]*>(.*?)</h1>', webpage, 'title')
25         media_url = 'http://stream.thisamericanlife.org/' + video_id + '/stream/' + video_id + '_64k.m3u8'
26
27         return {
28             'id': video_id,
29             'title': title,
30             'url': media_url,
31             'ext': 'aac',
32         }