_ Git - youtube-dl/blob - youtube_dl/extractor/cwtv.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     int_or_none,
   7     parse_iso8601,
   8 )
   9
  10
  11 class CWTVIE(InfoExtractor):
  12     _VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
  13     _TESTS = [{
  14         'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
  15         'info_dict': {
  16             'id': '6b15e985-9345-4f60-baf8-56e96be57c63',
  17             'ext': 'mp4',
  18             'title': 'Legends of Yesterday',
  19             'description': 'Oliver and Barry Allen take Kendra Saunders and Carter Hall to a remote location to keep them hidden from Vandal Savage while they figure out how to defeat him.',
  20             'duration': 2665,
  21             'series': 'Arrow',
  22             'season_number': 4,
  23             'season': '4',
  24             'episode_number': 8,
  25             'upload_date': '20151203',
  26             'timestamp': 1449122100,
  27         },
  28         'params': {
  29             # m3u8 download
  30             'skip_download': True,
  31         },
  32         'skip': 'redirect to http://cwtv.com/shows/arrow/',
  33     }, {
  34         'url': 'http://www.cwseed.com/shows/whose-line-is-it-anyway/jeff-davis-4/?play=24282b12-ead2-42f2-95ad-26770c2c6088',
  35         'info_dict': {
  36             'id': '24282b12-ead2-42f2-95ad-26770c2c6088',
  37             'ext': 'mp4',
  38             'title': 'Jeff Davis 4',
  39             'description': 'Jeff Davis is back to make you laugh.',
  40             'duration': 1263,
  41             'series': 'Whose Line Is It Anyway?',
  42             'season_number': 11,
  43             'season': '11',
  44             'episode_number': 20,
  45             'upload_date': '20151006',
  46             'timestamp': 1444107300,
  47         },
  48     }, {
  49         'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
  50         'only_matching': True,
  51     }, {
  52         'url': 'http://cwtvpr.com/the-cw/video?watch=9eee3f60-ef4e-440b-b3b2-49428ac9c54e',
  53         'only_matching': True,
  54     }, {
  55         'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?watch=6b15e985-9345-4f60-baf8-56e96be57c63',
  56         'only_matching': True,
  57     }]
  58
  59     def _real_extract(self, url):
  60         video_id = self._match_id(url)
  61         video_data = None
  62         formats = []
  63         for partner in (154, 213):
  64             vdata = self._download_json(
  65                 'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/%d?format=json' % (video_id, partner), video_id, fatal=False)
  66             if not vdata:
  67                 continue
  68             video_data = vdata
  69             for quality, quality_data in vdata.get('videos', {}).items():
  70                 quality_url = quality_data.get('uri')
  71                 if not quality_url:
  72                     continue
  73                 if quality == 'variantplaylist':
  74                     formats.extend(self._extract_m3u8_formats(
  75                         quality_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
  76                 else:
  77                     tbr = int_or_none(quality_data.get('bitrate'))
  78                     format_id = 'http' + ('-%d' % tbr if tbr else '')
  79                     if self._is_valid_url(quality_url, video_id, format_id):
  80                         formats.append({
  81                             'format_id': format_id,
  82                             'url': quality_url,
  83                             'tbr': tbr,
  84                         })
  85         video_metadata = video_data['assetFields']
  86         ism_url = video_metadata.get('smoothStreamingUrl')
  87         if ism_url:
  88             formats.extend(self._extract_ism_formats(
  89                 ism_url, video_id, ism_id='mss', fatal=False))
  90         self._sort_formats(formats)
  91
  92         thumbnails = [{
  93             'url': image['uri'],
  94             'width': image.get('width'),
  95             'height': image.get('height'),
  96         } for image_id, image in video_data['images'].items() if image.get('uri')] if video_data.get('images') else None
  97
  98         subtitles = {
  99             'en': [{
 100                 'url': video_metadata['UnicornCcUrl'],
 101             }],
 102         } if video_metadata.get('UnicornCcUrl') else None
 103
 104         return {
 105             'id': video_id,
 106             'title': video_metadata['title'],
 107             'description': video_metadata.get('description'),
 108             'duration': int_or_none(video_metadata.get('duration')),
 109             'series': video_metadata.get('seriesName'),
 110             'season_number': int_or_none(video_metadata.get('seasonNumber')),
 111             'season': video_metadata.get('seasonName'),
 112             'episode_number': int_or_none(video_metadata.get('episodeNumber')),
 113             'timestamp': parse_iso8601(video_data.get('startTime')),
 114             'thumbnails': thumbnails,
 115             'formats': formats,
 116             'subtitles': subtitles,
 117         }