correct the extractor name and id and remove unnecessary request
[youtube-dl] / youtube_dl / extractor / dcn.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import (
8     compat_urllib_parse,
9     compat_urllib_request,
10 )
11 from ..utils import (
12     int_or_none,
13     parse_iso8601,
14     smuggle_url,
15     unsmuggle_url,
16 )
17
18
19 class DCNGeneralIE(InfoExtractor):
20     _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
21
22     def _real_extract(self, url):
23         show_id, video_id, season_id = re.match(self._VALID_URL, url).groups()
24         url = ''
25         ie_key = ''
26         if video_id and int(video_id) > 0:
27             url = 'http://www.dcndigital.ae/#/media/%s' % video_id
28             ie_key = 'DCNVideo'
29         else:
30             ie_key = 'DCNSeason'
31             if season_id and int(season_id) > 0:
32                 url = smuggle_url('http://www.dcndigital.ae/#/program/season/%s' % season_id, {'show_id': show_id})
33             else:
34                 url = 'http://www.dcndigital.ae/#/program/%s' % show_id
35         return {
36             'url': url,
37             '_type': 'url',
38             'ie_key': ie_key
39         }
40
41
42 class DCNVideoIE(InfoExtractor):
43     IE_NAME = 'dcn:video'
44     _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/[^/]+|media)/(?P<id>\d+)'
45     _TEST = {
46         'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
47         'info_dict':
48         {
49             'id': '17375',
50             'ext': 'mp4',
51             'title': 'رحلة العمر : الحلقة 1',
52             'description': 'md5:0156e935d870acb8ef0a66d24070c6d6',
53             'thumbnail': 're:^https?://.*\.jpg$',
54             'duration': 2041,
55             'timestamp': 1227504126,
56             'upload_date': '20081124',
57         },
58         'params': {
59             # m3u8 download
60             'skip_download': True,
61         },
62     }
63
64     def _real_extract(self, url):
65         video_id = self._match_id(url)
66
67         request = compat_urllib_request.Request(
68             'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
69             headers={'Origin': 'http://www.dcndigital.ae'})
70
71         video = self._download_json(request, video_id)
72         title = video.get('title_en') or video['title_ar']
73
74         webpage = self._download_webpage(
75             'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?'
76             + compat_urllib_parse.urlencode({
77                 'id': video['id'],
78                 'user_id': video['user_id'],
79                 'signature': video['signature'],
80                 'countries': 'Q0M=',
81                 'filter': 'DENY',
82             }), video_id)
83
84         m3u8_url = self._html_search_regex(r'file:\s*"([^"]+)', webpage, 'm3u8 url')
85         formats = self._extract_m3u8_formats(
86             m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
87
88         rtsp_url = self._search_regex(
89             r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False)
90         if rtsp_url:
91             formats.append({
92                 'url': rtsp_url,
93                 'format_id': 'rtsp',
94             })
95
96         self._sort_formats(formats)
97
98         img = video.get('img')
99         thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None
100         duration = int_or_none(video.get('duration'))
101         description = video.get('description_en') or video.get('description_ar')
102         timestamp = parse_iso8601(video.get('create_time') or video.get('update_time'), ' ')
103
104         return {
105             'id': video_id,
106             'title': title,
107             'description': description,
108             'thumbnail': thumbnail,
109             'duration': duration,
110             'timestamp': timestamp,
111             'formats': formats,
112         }
113
114
115 class DCNSeasonIE(InfoExtractor):
116     IE_NAME = 'dcn:season'
117     _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
118     _TEST = {
119         'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
120         'info_dict':
121         {
122             'id': '7910',
123             'title': 'محاضرات الشيخ الشعراوي',
124             'description': '',
125         },
126         'playlist_mincount': 27,
127     }
128
129     def _real_extract(self, url):
130         url, smuggled_data = unsmuggle_url(url, {})
131         show_id, season_id = re.match(self._VALID_URL, url).groups()
132         data = {}
133         if season_id:
134             data['season'] = season_id
135             show_id = smuggled_data.get('show_id')
136             if show_id is None:
137                 request = compat_urllib_request.Request(
138                     'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id,
139                     headers={'Origin': 'http://www.dcndigital.ae'})
140                 season = self._download_json(request, season_id)
141                 show_id = season['id']
142         data['show_id'] = show_id
143         request = compat_urllib_request.Request(
144             'http://admin.mangomolo.com/analytics/index.php/plus/show',
145             compat_urllib_parse.urlencode(data),
146             {
147                 'Origin': 'http://www.dcndigital.ae',
148                 'Content-Type': 'application/x-www-form-urlencoded'
149             })
150         show = self._download_json(request, show_id)
151         season_id = season_id or show['default_season']
152         title = show['cat'].get('title_en') or show['cat']['title_ar']
153         description = show['cat'].get('description_en') or show['cat'].get('description_ar')
154         entries = []
155         for video in show['videos']:
156             entries.append({
157                 'url': 'http://www.dcndigital.ae/#/media/%s' % video['id'],
158                 '_type': 'url',
159                 'ie_key': 'DCNVideo',
160             })
161         return {
162             'id': season_id,
163             'title': title,
164             'description': description,
165             'entries': entries,
166             '_type': 'playlist',
167         }