[shahid] try to bypass geo restriction and extract more metadata(closes #10062)
[youtube-dl] / youtube_dl / extractor / shahid.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6     ExtractorError,
7     int_or_none,
8     parse_iso8601,
9     str_or_none,
10 )
11
12
13 class ShahidIE(InfoExtractor):
14     _VALID_URL = r'https?://shahid\.mbc\.net/ar/episode/(?P<id>\d+)/?'
15     _TESTS = [{
16         'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html',
17         'info_dict': {
18             'id': '90574',
19             'ext': 'mp4',
20             'title': 'الملك عبدالله الإنسان الموسم 1 كليب 3',
21             'description': 'الفيلم الوثائقي - الملك عبد الله الإنسان',
22             'duration': 2972,
23             'timestamp': 1422057420,
24             'upload_date': '20150123',
25         },
26         'params': {
27             # m3u8 download
28             'skip_download': True,
29         }
30     }, {
31         # shahid plus subscriber only
32         'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html',
33         'only_matching': True
34     }]
35
36     def _call_api(self, path, video_id, note):
37         data = self._download_json(
38             'http://api.shahid.net/api/v1_1/' + path, video_id, note, query={
39                 'apiKey': 'sh@hid0nlin3',
40                 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
41             }).get('data', {})
42
43         error = data.get('error')
44         if error:
45             raise ExtractorError(
46                 '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())),
47                 expected=True)
48
49         return data
50
51     def _real_extract(self, url):
52         video_id = self._match_id(url)
53
54         player = self._call_api(
55             'Content/Episode/%s' % video_id,
56             video_id, 'Downloading player JSON')
57
58         if player.get('drm'):
59             raise ExtractorError('This video is DRM protected.', expected=True)
60
61         formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
62         self._sort_formats(formats)
63
64         video = self._call_api(
65             'episode/%s' % video_id, video_id,
66             'Downloading video JSON')['episode']
67
68         title = video['title']
69         categories = [
70             category['name']
71             for category in video.get('genres', []) if 'name' in category]
72
73         return {
74             'id': video_id,
75             'title': title,
76             'description': video.get('description'),
77             'thumbnail': video.get('thumbnailUrl'),
78             'duration': int_or_none(video.get('duration')),
79             'timestamp': parse_iso8601(video.get('referenceDate')),
80             'categories': categories,
81             'series': video.get('showTitle') or video.get('showName'),
82             'season': video.get('seasonTitle'),
83             'season_number': int_or_none(video.get('seasonNumber')),
84             'season_id': str_or_none(video.get('seasonId')),
85             'episode_number': int_or_none(video.get('number')),
86             'episode_id': video_id,
87             'formats': formats,
88         }