_ Git - youtube-dl/blob - youtube_dl/extractor/tbs.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .turner import TurnerBaseIE
   7 from ..utils import (
   8     float_or_none,
   9     int_or_none,
  10     strip_or_none,
  11 )
  12
  13
  14 class TBSIE(TurnerBaseIE):
  15     _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+)'
  16     _TESTS = [{
  17         'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster',
  18         'info_dict': {
  19             'id': '8d384cde33b89f3a43ce5329de42903ed5099887',
  20             'ext': 'mp4',
  21             'title': 'Monster',
  22             'description': 'Get a first look at the theatrical trailer for TNT’s highly anticipated new psychological thriller The Alienist, which premieres January 22 on TNT.',
  23             'timestamp': 1508175329,
  24             'upload_date': '20171016',
  25         },
  26         'params': {
  27             # m3u8 download
  28             'skip_download': True,
  29         }
  30     }, {
  31         'url': 'http://www.tbs.com/shows/search-party/season-1/episode-1/explicit-the-mysterious-disappearance-of-the-girl-no-one-knew',
  32         'only_matching': True,
  33     }, {
  34         'url': 'http://www.tntdrama.com/movies/star-wars-a-new-hope',
  35         'only_matching': True,
  36     }]
  37
  38     def _real_extract(self, url):
  39         domain, display_id = re.match(self._VALID_URL, url).groups()
  40         site = domain[:3]
  41         webpage = self._download_webpage(url, display_id)
  42         video_data = self._parse_json(self._search_regex(
  43             r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
  44             webpage, 'drupal setting'), display_id)['turner_playlist'][0]
  45
  46         media_id = video_data['mediaID']
  47         title = video_data['title']
  48
  49         streams_data = self._download_json(
  50             'http://medium.ngtv.io/media/%s/tv' % media_id,
  51             media_id)['media']['tv']
  52         duration = None
  53         chapters = []
  54         formats = []
  55         for supported_type in ('unprotected', 'bulkaes'):
  56             stream_data = streams_data.get(supported_type, {})
  57             m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
  58             if not m3u8_url:
  59                 continue
  60             if stream_data.get('playlistProtection') == 'spe':
  61                 m3u8_url = self._add_akamai_spe_token(
  62                     'http://www.%s.com/service/token_spe' % site,
  63                     m3u8_url, media_id, {
  64                         'url': url,
  65                         'site_name': site.upper(),
  66                         'auth_required': video_data.get('authRequired') == '1',
  67                     })
  68             formats.extend(self._extract_m3u8_formats(
  69                 m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
  70
  71             duration = float_or_none(stream_data.get('totalRuntime') or video_data.get('duration'))
  72
  73             if not chapters:
  74                 for chapter in stream_data.get('contentSegments', []):
  75                     start_time = float_or_none(chapter.get('start'))
  76                     duration = float_or_none(chapter.get('duration'))
  77                     if start_time is None or duration is None:
  78                         continue
  79                     chapters.append({
  80                         'start_time': start_time,
  81                         'end_time': start_time + duration,
  82                     })
  83         self._sort_formats(formats)
  84
  85         thumbnails = []
  86         for image_id, image in video_data.get('images', {}).items():
  87             image_url = image.get('url')
  88             if not image_url or image.get('type') != 'video':
  89                 continue
  90             i = {
  91                 'id': image_id,
  92                 'url': image_url,
  93             }
  94             mobj = re.search(r'(\d+)x(\d+)', image_url)
  95             if mobj:
  96                 i.update({
  97                     'width': int(mobj.group(1)),
  98                     'height': int(mobj.group(2)),
  99                 })
 100             thumbnails.append(i)
 101
 102         return {
 103             'id': media_id,
 104             'title': title,
 105             'description': strip_or_none(video_data.get('descriptionNoTags') or video_data.get('shortDescriptionNoTags')),
 106             'duration': duration,
 107             'timestamp': int_or_none(video_data.get('created')),
 108             'season_number': int_or_none(video_data.get('season')),
 109             'episode_number': int_or_none(video_data.get('episode')),
 110             'cahpters': chapters,
 111             'thumbnails': thumbnails,
 112             'formats': formats,
 113         }