[cwtv] Add new extractor
[youtube-dl] / youtube_dl / extractor / cwtv.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6     int_or_none,
7     parse_iso8601,
8 )
9
10
11 class CWTVIE(InfoExtractor):
12     _VALID_URL = r'https?://(?:www\.)?cw(?:tv|seed)\.com/shows/(?:[^/]+/){2}\?play=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
13     _TESTS = [{
14         'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
15         'info_dict': {
16             'id': '6b15e985-9345-4f60-baf8-56e96be57c63',
17             'ext': 'mp4',
18             'title': 'Legends of Yesterday',
19             'description': 'Oliver and Barry Allen take Kendra Saunders and Carter Hall to a remote location to keep them hidden from Vandal Savage while they figure out how to defeat him.',
20             'duration': 2665,
21             'series': 'Arrow',
22             'season_number': 4,
23             'season': '4',
24             'episode_number': 8,
25             'upload_date': '20151203',
26             'timestamp': 1449122100,
27         },
28         'params': {
29             # m3u8 download
30             'skip_download': True,
31         }
32     }, {
33         'url': 'http://www.cwseed.com/shows/whose-line-is-it-anyway/jeff-davis-4/?play=24282b12-ead2-42f2-95ad-26770c2c6088',
34         'info_dict': {
35             'id': '24282b12-ead2-42f2-95ad-26770c2c6088',
36             'ext': 'mp4',
37             'title': 'Jeff Davis 4',
38             'description': 'Jeff Davis is back to make you laugh.',
39             'duration': 1263,
40             'series': 'Whose Line Is It Anyway?',
41             'season_number': 11,
42             'season': '11',
43             'episode_number': 20,
44             'upload_date': '20151006',
45             'timestamp': 1444107300,
46         },
47         'params': {
48             # m3u8 download
49             'skip_download': True,
50         }
51     }]
52
53     def _real_extract(self, url):
54         video_id = self._match_id(url)
55         video_data = self._download_json(
56             'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/132?format=json' % video_id, video_id)
57
58         formats = self._extract_m3u8_formats(
59             video_data['videos']['variantplaylist']['uri'], video_id, 'mp4')
60
61         thumbnails = [{
62             'url': image['uri'],
63             'width': image.get('width'),
64             'height': image.get('height'),
65         } for image_id, image in video_data['images'].items() if image.get('uri')] if video_data.get('images') else None
66
67         video_metadata = video_data['assetFields']
68
69         subtitles = {
70             'en': [{
71                 'url': video_metadata['UnicornCcUrl'],
72             }],
73         } if video_metadata.get('UnicornCcUrl') else None
74
75         return {
76             'id': video_id,
77             'title': video_metadata['title'],
78             'description': video_metadata.get('description'),
79             'duration': int_or_none(video_metadata.get('duration')),
80             'series': video_metadata.get('seriesName'),
81             'season_number': int_or_none(video_metadata.get('seasonNumber')),
82             'season': video_metadata.get('seasonName'),
83             'episode_number': int_or_none(video_metadata.get('episodeNumber')),
84             'timestamp': parse_iso8601(video_data.get('startTime')),
85             'thumbnails': thumbnails,
86             'formats': formats,
87             'subtitles': subtitles,
88         }