[awaan] fix extraction, modernize, rename the extractors and add test for live stream
[youtube-dl] / youtube_dl / extractor / awaan.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5 import base64
6
7 from .common import InfoExtractor
8 from ..compat import (
9     compat_urllib_parse_urlencode,
10     compat_str,
11 )
12 from ..utils import (
13     int_or_none,
14     parse_iso8601,
15     smuggle_url,
16     unsmuggle_url,
17     urlencode_postdata,
18 )
19
20
21 class AWAANIE(InfoExtractor):
22     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
23
24     def _real_extract(self, url):
25         show_id, video_id, season_id = re.match(self._VALID_URL, url).groups()
26         if video_id and int(video_id) > 0:
27             return self.url_result(
28                 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo')
29         elif season_id and int(season_id) > 0:
30             return self.url_result(smuggle_url(
31                 'http://awaan.ae/program/season/%s' % season_id,
32                 {'show_id': show_id}), 'AWAANSeason')
33         else:
34             return self.url_result(
35                 'http://awaan.ae/program/%s' % show_id, 'AWAANSeason')
36
37
38 class AWAANBaseIE(InfoExtractor):
39     def _parse_video_data(self, video_data, video_id, is_live):
40         title = video_data.get('title_en') or video_data['title_ar']
41         img = video_data.get('img')
42
43         return {
44             'id': video_id,
45             'title': self._live_title(title) if is_live else title,
46             'description': video_data.get('description_en') or video_data.get('description_ar'),
47             'thumbnail': 'http://admin.mangomolo.com/analytics/%s' % img if img else None,
48             'duration': int_or_none(video_data.get('duration')),
49             'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
50             'is_live': is_live,
51         }
52
53     def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol):
54         formats = []
55         format_url_base = 'http' + self._html_search_regex(
56             [
57                 r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
58                 r'<a[^>]+href="rtsp(://[^"]+)"'
59             ], webpage, 'format url')
60         formats.extend(self._extract_mpd_formats(
61             format_url_base + '/manifest.mpd',
62             video_id, mpd_id='dash', fatal=False))
63         formats.extend(self._extract_m3u8_formats(
64             format_url_base + '/playlist.m3u8', video_id, 'mp4',
65             m3u8_entry_protocol, m3u8_id='hls', fatal=False))
66         formats.extend(self._extract_f4m_formats(
67             format_url_base + '/manifest.f4m',
68             video_id, f4m_id='hds', fatal=False))
69         self._sort_formats(formats)
70         return formats
71
72
73 class AWAANVideoIE(AWAANBaseIE):
74     IE_NAME = 'awaan:video'
75     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
76     _TESTS = [{
77         'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
78         'md5': '5f61c33bfc7794315c671a62d43116aa',
79         'info_dict':
80         {
81             'id': '17375',
82             'ext': 'mp4',
83             'title': 'رحلة العمر : الحلقة 1',
84             'description': 'md5:0156e935d870acb8ef0a66d24070c6d6',
85             'duration': 2041,
86             'timestamp': 1227504126,
87             'upload_date': '20081124',
88         },
89     }, {
90         'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1',
91         'only_matching': True,
92     }]
93
94     def _real_extract(self, url):
95         video_id = self._match_id(url)
96
97         video_data = self._download_json(
98             'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
99             video_id, headers={'Origin': 'http://awaan.ae'})
100         info = self._parse_video_data(video_data, video_id, False)
101
102         webpage = self._download_webpage(
103             'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' +
104             compat_urllib_parse_urlencode({
105                 'id': video_data['id'],
106                 'user_id': video_data['user_id'],
107                 'signature': video_data['signature'],
108                 'countries': 'Q0M=',
109                 'filter': 'DENY',
110             }), video_id)
111         info['formats'] = self._extract_video_formats(webpage, video_id, 'm3u8_native')
112         return info
113
114
115 class AWAANLiveIE(AWAANBaseIE):
116     IE_NAME = 'awaan:live'
117     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)'
118     _TEST = {
119         'url': 'http://awaan.ae/live/6/dubai-tv',
120         'info_dict': {
121             'id': '6',
122             'ext': 'mp4',
123             'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
124             'upload_date': '20150107',
125             'timestamp': 1420588800,
126         },
127         'params': {
128             # m3u8 download
129             'skip_download': True,
130         },
131     }
132
133     def _real_extract(self, url):
134         channel_id = self._match_id(url)
135
136         channel_data = self._download_json(
137             'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id,
138             channel_id, headers={'Origin': 'http://awaan.ae'})
139         info = self._parse_video_data(channel_data, channel_id, True)
140
141         webpage = self._download_webpage(
142             'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' +
143             compat_urllib_parse_urlencode({
144                 'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
145                 'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
146                 'signature': channel_data['signature'],
147                 'countries': 'Q0M=',
148                 'filter': 'DENY',
149             }), channel_id)
150         info['formats'] = self._extract_video_formats(webpage, channel_id, 'm3u8')
151         return info
152
153
154 class AWAANSeasonIE(InfoExtractor):
155     IE_NAME = 'awaan:season'
156     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
157     _TEST = {
158         'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
159         'info_dict':
160         {
161             'id': '7910',
162             'title': 'محاضرات الشيخ الشعراوي',
163         },
164         'playlist_mincount': 27,
165     }
166
167     def _real_extract(self, url):
168         url, smuggled_data = unsmuggle_url(url, {})
169         show_id, season_id = re.match(self._VALID_URL, url).groups()
170
171         data = {}
172         if season_id:
173             data['season'] = season_id
174             show_id = smuggled_data.get('show_id')
175             if show_id is None:
176                 season = self._download_json(
177                     'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id,
178                     season_id, headers={'Origin': 'http://awaan.ae'})
179                 show_id = season['id']
180         data['show_id'] = show_id
181         show = self._download_json(
182             'http://admin.mangomolo.com/analytics/index.php/plus/show',
183             show_id, data=urlencode_postdata(data), headers={
184                 'Origin': 'http://awaan.ae',
185                 'Content-Type': 'application/x-www-form-urlencoded'
186             })
187         if not season_id:
188             season_id = show['default_season']
189         for season in show['seasons']:
190             if season['id'] == season_id:
191                 title = season.get('title_en') or season['title_ar']
192
193                 entries = []
194                 for video in show['videos']:
195                     video_id = compat_str(video['id'])
196                     entries.append(self.url_result(
197                         'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id))
198
199                 return self.playlist_result(entries, season_id, title)