_ Git - youtube-dl/blob - youtube_dl/extractor/discovery.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     parse_duration,
   8     parse_iso8601,
   9 )
  10 from ..compat import (
  11     compat_str,
  12     compat_urlparse,
  13 )
  14
  15
  16 class DiscoveryIE(InfoExtractor):
  17     _VALID_URL = r'''(?x)https?://(?:www\.)?(?:
  18             discovery|
  19             investigationdiscovery|
  20             discoverylife|
  21             animalplanet|
  22             ahctv|
  23             destinationamerica|
  24             sciencechannel|
  25             tlc|
  26             velocity
  27         )\.com/(?:[^/]+/)*(?P<id>[^./?#]+)'''
  28     _TESTS = [{
  29         'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
  30         'info_dict': {
  31             'id': '20769',
  32             'ext': 'mp4',
  33             'title': 'Mission Impossible Outtakes',
  34             'description': ('Watch Jamie Hyneman and Adam Savage practice being'
  35                             ' each other -- to the point of confusing Jamie\'s dog -- and '
  36                             'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
  37                             ' back.'),
  38             'duration': 156,
  39             'timestamp': 1302032462,
  40             'upload_date': '20110405',
  41             'uploader_id': '103207',
  42         },
  43         'params': {
  44             'skip_download': True,  # requires ffmpeg
  45         }
  46     }, {
  47         'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mythbusters-the-simpsons',
  48         'info_dict': {
  49             'id': 'mythbusters-the-simpsons',
  50             'title': 'MythBusters: The Simpsons',
  51         },
  52         'playlist_mincount': 10,
  53     }, {
  54         'url': 'http://www.animalplanet.com/longfin-eels-maneaters/',
  55         'info_dict': {
  56             'id': '78326',
  57             'ext': 'mp4',
  58             'title': 'Longfin Eels: Maneaters?',
  59             'description': 'Jeremy Wade tests whether or not New Zealand\'s longfin eels are man-eaters by covering himself in fish guts and getting in the water with them.',
  60             'upload_date': '20140725',
  61             'timestamp': 1406246400,
  62             'duration': 116,
  63             'uploader_id': '103207',
  64         },
  65         'params': {
  66             'skip_download': True,  # requires ffmpeg
  67         }
  68     }]
  69
  70     def _real_extract(self, url):
  71         display_id = self._match_id(url)
  72         info = self._download_json(url + '?flat=1', display_id)
  73
  74         video_title = info.get('playlist_title') or info.get('video_title')
  75
  76         entries = []
  77
  78         for idx, video_info in enumerate(info['playlist']):
  79             subtitles = []
  80             caption_url = video_info.get('captionsUrl')
  81             if caption_url:
  82                 subtitles = {
  83                     'en': [{
  84                         'url': caption_url,
  85                     }]
  86                 }
  87
  88             entries.append({
  89                 '_type': 'url_transparent',
  90                 'url': 'http://players.brightcove.net/103207/default_default/index.html?videoId=ref:%s' % video_info['referenceId'],
  91                 'id': compat_str(video_info['id']),
  92                 'title': video_info['title'],
  93                 'description': video_info.get('description'),
  94                 'duration': parse_duration(video_info.get('video_length')),
  95                 'webpage_url': video_info.get('href') or video_info.get('url'),
  96                 'thumbnail': video_info.get('thumbnailURL'),
  97                 'alt_title': video_info.get('secondary_title'),
  98                 'timestamp': parse_iso8601(video_info.get('publishedDate')),
  99                 'subtitles': subtitles,
 100             })
 101
 102         return self.playlist_result(entries, display_id, video_title)