[dcn] add show extraction and support for other types of urls
[youtube-dl] / youtube_dl / extractor / dcn.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import (
8     compat_urllib_parse,
9     compat_urllib_request,
10 )
11 from ..utils import (
12     int_or_none,
13     parse_iso8601,
14 )
15
16
17 class DCNGeneralIE(InfoExtractor):
18     _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
19
20     def _real_extract(self, url):
21         show_id, video_id, season_id = re.match(self._VALID_URL, url).groups()
22         url = ''
23         ie_key = ''
24         if video_id and int(video_id) > 0:
25             url = 'http://www.dcndigital.ae/#/media/%s' % video_id
26             ie_key = 'DCNVideo'
27         else:
28             ie_key = 'DCNShow'
29             if season_id and int(season_id) > 0:
30                 url = 'http://www.dcndigital.ae/#/program/season/%s' % season_id
31             else:
32                 url = 'http://www.dcndigital.ae/#/program/%s' % show_id
33         return {
34             'url': url,
35             '_type': 'url',
36             'ie_key': ie_key
37         }
38
39
40 class DCNVideoIE(InfoExtractor):
41     _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/[^/]+|media)/(?P<id>\d+)'
42     _TEST = {
43         'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
44         'info_dict':
45         {
46             'id': '17375',
47             'ext': 'mp4',
48             'title': 'رحلة العمر : الحلقة 1',
49             'description': 'md5:0156e935d870acb8ef0a66d24070c6d6',
50             'thumbnail': 're:^https?://.*\.jpg$',
51             'duration': 2041,
52             'timestamp': 1227504126,
53             'upload_date': '20081124',
54         },
55         'params': {
56             # m3u8 download
57             'skip_download': True,
58         },
59     }
60
61     def _real_extract(self, url):
62         video_id = self._match_id(url)
63
64         request = compat_urllib_request.Request(
65             'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
66             headers={'Origin': 'http://www.dcndigital.ae'})
67
68         video = self._download_json(request, video_id)
69         title = video.get('title_en') or video['title_ar']
70
71         webpage = self._download_webpage(
72             'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?'
73             + compat_urllib_parse.urlencode({
74                 'id': video['id'],
75                 'user_id': video['user_id'],
76                 'signature': video['signature'],
77                 'countries': 'Q0M=',
78                 'filter': 'DENY',
79             }), video_id)
80
81         m3u8_url = self._html_search_regex(r'file:\s*"([^"]+)', webpage, 'm3u8 url')
82         formats = self._extract_m3u8_formats(
83             m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
84
85         rtsp_url = self._search_regex(
86             r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False)
87         if rtsp_url:
88             formats.append({
89                 'url': rtsp_url,
90                 'format_id': 'rtsp',
91             })
92
93         self._sort_formats(formats)
94
95         img = video.get('img')
96         thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None
97         duration = int_or_none(video.get('duration'))
98         description = video.get('description_en') or video.get('description_ar')
99         timestamp = parse_iso8601(video.get('create_time') or video.get('update_time'), ' ')
100
101         return {
102             'id': video_id,
103             'title': title,
104             'description': description,
105             'thumbnail': thumbnail,
106             'duration': duration,
107             'timestamp': timestamp,
108             'formats': formats,
109         }
110
111
112 class DCNShowIE(InfoExtractor):
113     _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
114     _TEST = {
115         'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
116         'info_dict':
117         {
118             'id': '205024',
119             'title': 'محاضرات الشيخ الشعراوي',
120             'description': '',
121         },
122         'playlist_mincount': 27,
123     }
124
125     def _real_extract(self, url):
126         show_id, season_id = re.match(self._VALID_URL, url).groups()
127         data = {}
128         if season_id:
129             request = compat_urllib_request.Request(
130                 'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id,
131                 headers={'Origin': 'http://www.dcndigital.ae'})
132             season = self._download_json(request, season_id)
133             show_id = season['id']
134             data['season'] = season_id
135         data['show_id'] = show_id
136         request = compat_urllib_request.Request(
137             'http://admin.mangomolo.com/analytics/index.php/plus/show',
138             compat_urllib_parse.urlencode(data),
139             {
140                 'Origin': 'http://www.dcndigital.ae',
141                 'Content-Type': 'application/x-www-form-urlencoded'
142             })
143         show = self._download_json(request, show_id)
144         title = show['cat'].get('title_en') or show['cat']['title_ar']
145         description = show['cat'].get('description_en') or show['cat'].get('description_ar')
146         entries = []
147         for video in show['videos']:
148             entries.append({
149                 'url': 'http://www.dcndigital.ae/#/media/%s' % video['id'],
150                 '_type': 'url',
151                 'ie_key': 'DCNVideo',
152             })
153         return {
154             'id': show_id,
155             'title': title,
156             'description': description,
157             'entries': entries,
158             '_type': 'playlist',
159         }