_ Git - youtube-dl/blob - youtube_dl/extractor/thisamericanlife.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5
   6
   7 class ThisAmericanLifeIE(InfoExtractor):
   8     _VALID_URL = r'https?://(?:www\.)?thisamericanlife\.org/radio-archives/episode/(?P<id>\d+)'
   9     _TEST = {
  10         'url': 'http://www.thisamericanlife.org/radio-archives/episode/487/harper-high-school-part-one',
  11         'md5': '5cda28076c9f9d1fd0b0f5cff5959948',
  12         'info_dict': {
  13             'id': '487',
  14             'title': '487: Harper High School, Part One',
  15             'url' : 'http://stream.thisamericanlife.org/487/stream/487_64k.m3u8',
  16             'ext': 'aac',
  17         }
  18     }
  19
  20     def _real_extract(self, url):
  21         video_id = self._match_id(url)
  22         webpage = self._download_webpage(url, video_id)
  23
  24         title = self._html_search_regex(r'<h1[^>]*>(.*?)</h1>', webpage, 'title')
  25         media_url = 'http://stream.thisamericanlife.org/' + video_id + '/stream/' + video_id + '_64k.m3u8'
  26
  27         return {
  28             'id': video_id,
  29             'title': title,
  30             'url': media_url,
  31             'ext': 'aac',
  32         }