_ Git - youtube-dl/blob - youtube_dl/extractor/huffpost.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     parse_duration,
   8     unified_strdate,
   9 )
  10
  11
  12 class HuffPostIE(InfoExtractor):
  13     IE_DESC = 'Huffington Post'
  14     _VALID_URL = r'''(?x)
  15         https?://(embed\.)?live\.huffingtonpost\.com/
  16         (?:
  17             r/segment/[^/]+/|
  18             HPLEmbedPlayer/\?segmentId=
  19         )
  20         (?P<id>[0-9a-f]+)'''
  21
  22     _TEST = {
  23         'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
  24         'file': '52dd3e4b02a7602131000677.mp4',
  25         'md5': 'TODO',
  26         'info_dict': {
  27             'title': 'TODO',
  28             'description': 'TODO',
  29             'duration': 1549,
  30         }
  31     }
  32
  33     def _real_extract(self, url):
  34         mobj = re.match(self._VALID_URL, url)
  35         video_id = mobj.group('id')
  36
  37         api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id
  38         data = self._download_json(api_url, video_id)['data']
  39
  40         video_title = data['title']
  41         duration = parse_duration(data['running_time'])
  42         upload_date = unified_strdate(data['schedule']['started_at'])
  43
  44         thumbnails = []
  45         for url in data['images'].values():
  46             m = re.match('.*-([0-9]+x[0-9]+)\.', url)
  47             if not m:
  48                 continue
  49             thumbnails.append({
  50                 'url': url,
  51                 'resolution': m.group(1),
  52             })
  53
  54         formats = [{
  55             'format': key,
  56             'format_id': key.replace('/', '.'),
  57             'ext': 'mp4',
  58             'url': url,
  59             'vcodec': 'none' if key.startswith('audio/') else None,
  60         } for key, url in data['sources']['live'].items()]
  61         self._sort_formats(formats)
  62
  63         return {
  64             'id': video_id,
  65             'title': video_title,
  66             'formats': formats,
  67             'duration': duration,
  68             'upload_date': upload_date,
  69             'thumbnails': thumbnails,
  70         }