From: Remita Amine Date: Tue, 12 Jul 2016 22:15:38 +0000 (+0100) Subject: [shahid] try to bypass geo restriction and extract more metadata(closes #10062) X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=41aa44259d3a0791b1a023a18c9a933f71e04c50;p=youtube-dl [shahid] try to bypass geo restriction and extract more metadata(closes #10062) --- diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index d95ea06be..ca286abb1 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -2,11 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, parse_iso8601, + str_or_none, ) @@ -33,45 +33,27 @@ class ShahidIE(InfoExtractor): 'only_matching': True }] - def _handle_error(self, response): - if not isinstance(response, dict): - return - error = response.get('error') + def _call_api(self, path, video_id, note): + data = self._download_json( + 'http://api.shahid.net/api/v1_1/' + path, video_id, note, query={ + 'apiKey': 'sh@hid0nlin3', + 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', + }).get('data', {}) + + error = data.get('error') if error: raise ExtractorError( '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())), expected=True) - def _download_json(self, url, video_id, note='Downloading JSON metadata'): - response = super(ShahidIE, self)._download_json(url, video_id, note)['data'] - self._handle_error(response) - return response + return data def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - api_vars = { - 'id': video_id, - 'type': 'player', - 'url': 'http://api.shahid.net/api/v1_1', - 'playerType': 'episode', - } - - flashvars = self._search_regex( - r'var\s+flashvars\s*=\s*({[^}]+})', webpage, 'flashvars', default=None) - if flashvars: - for key in api_vars.keys(): - value = self._search_regex( - r'\b%s\s*:\s*(?P["\'])(?P.+?)(?P=q)' % key, - flashvars, 'type', default=None, group='value') - if value: - api_vars[key] = value - - player = self._download_json( - 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-%s.html' - % (video_id, api_vars['type']), video_id, 'Downloading player JSON') + player = self._call_api( + 'Content/Episode/%s' % video_id, + video_id, 'Downloading player JSON') if player.get('drm'): raise ExtractorError('This video is DRM protected.', expected=True) @@ -79,22 +61,11 @@ class ShahidIE(InfoExtractor): formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4') self._sort_formats(formats) - video = self._download_json( - '%s/%s/%s?%s' % ( - api_vars['url'], api_vars['playerType'], api_vars['id'], - compat_urllib_parse_urlencode({ - 'apiKey': 'sh@hid0nlin3', - 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', - })), - video_id, 'Downloading video JSON') - - video = video[api_vars['playerType']] + video = self._call_api( + 'episode/%s' % video_id, video_id, + 'Downloading video JSON')['episode'] title = video['title'] - description = video.get('description') - thumbnail = video.get('thumbnailUrl') - duration = int_or_none(video.get('duration')) - timestamp = parse_iso8601(video.get('referenceDate')) categories = [ category['name'] for category in video.get('genres', []) if 'name' in category] @@ -102,10 +73,16 @@ class ShahidIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': timestamp, + 'description': video.get('description'), + 'thumbnail': video.get('thumbnailUrl'), + 'duration': int_or_none(video.get('duration')), + 'timestamp': parse_iso8601(video.get('referenceDate')), 'categories': categories, + 'series': video.get('showTitle') or video.get('showName'), + 'season': video.get('seasonTitle'), + 'season_number': int_or_none(video.get('seasonNumber')), + 'season_id': str_or_none(video.get('seasonId')), + 'episode_number': int_or_none(video.get('number')), + 'episode_id': video_id, 'formats': formats, }