[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / contv.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6     float_or_none,
7     int_or_none,
8 )
9
10
11 class CONtvIE(InfoExtractor):
12     _VALID_URL = r'https?://(?:www\.)?contv\.com/details-movie/(?P<id>[^/]+)'
13     _TESTS = [{
14         'url': 'https://www.contv.com/details-movie/CEG10022949/days-of-thrills-&-laughter',
15         'info_dict': {
16             'id': 'CEG10022949',
17             'ext': 'mp4',
18             'title': 'Days Of Thrills & Laughter',
19             'description': 'md5:5d6b3d0b1829bb93eb72898c734802eb',
20             'upload_date': '20180703',
21             'timestamp': 1530634789.61,
22         },
23         'params': {
24             # m3u8 download
25             'skip_download': True,
26         },
27     }, {
28         'url': 'https://www.contv.com/details-movie/CLIP-show_fotld_bts/fight-of-the-living-dead:-behind-the-scenes-bites',
29         'info_dict': {
30             'id': 'CLIP-show_fotld_bts',
31             'title': 'Fight of the Living Dead: Behind the Scenes Bites',
32         },
33         'playlist_mincount': 7,
34     }]
35
36     def _real_extract(self, url):
37         video_id = self._match_id(url)
38         details = self._download_json(
39             'http://metax.contv.live.junctiontv.net/metax/2.5/details/' + video_id,
40             video_id, query={'device': 'web'})
41
42         if details.get('type') == 'episodic':
43             seasons = self._download_json(
44                 'http://metax.contv.live.junctiontv.net/metax/2.5/seriesfeed/json/' + video_id,
45                 video_id)
46             entries = []
47             for season in seasons:
48                 for episode in season.get('episodes', []):
49                     episode_id = episode.get('id')
50                     if not episode_id:
51                         continue
52                     entries.append(self.url_result(
53                         'https://www.contv.com/details-movie/' + episode_id,
54                         CONtvIE.ie_key(), episode_id))
55             return self.playlist_result(entries, video_id, details.get('title'))
56
57         m_details = details['details']
58         title = details['title']
59
60         formats = []
61
62         media_hls_url = m_details.get('media_hls_url')
63         if media_hls_url:
64             formats.extend(self._extract_m3u8_formats(
65                 media_hls_url, video_id, 'mp4',
66                 m3u8_id='hls', fatal=False))
67
68         media_mp4_url = m_details.get('media_mp4_url')
69         if media_mp4_url:
70             formats.append({
71                 'format_id': 'http',
72                 'url': media_mp4_url,
73             })
74
75         self._sort_formats(formats)
76
77         subtitles = {}
78         captions = m_details.get('captions') or {}
79         for caption_url in captions.values():
80             subtitles.setdefault('en', []).append({
81                 'url': caption_url
82             })
83
84         thumbnails = []
85         for image in m_details.get('images', []):
86             image_url = image.get('url')
87             if not image_url:
88                 continue
89             thumbnails.append({
90                 'url': image_url,
91                 'width': int_or_none(image.get('width')),
92                 'height': int_or_none(image.get('height')),
93             })
94
95         description = None
96         for p in ('large_', 'medium_', 'small_', ''):
97             d = m_details.get(p + 'description')
98             if d:
99                 description = d
100                 break
101
102         return {
103             'id': video_id,
104             'title': title,
105             'formats': formats,
106             'thumbnails': thumbnails,
107             'description': description,
108             'timestamp': float_or_none(details.get('metax_added_on'), 1000),
109             'subtitles': subtitles,
110             'duration': float_or_none(m_details.get('duration'), 1000),
111             'view_count': int_or_none(details.get('num_watched')),
112             'like_count': int_or_none(details.get('num_fav')),
113             'categories': details.get('category'),
114             'tags': details.get('tags'),
115             'season_number': int_or_none(details.get('season')),
116             'episode_number': int_or_none(details.get('episode')),
117             'release_year': int_or_none(details.get('pub_year')),
118         }