X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fsmotri.py;h=24746a09a0c2183e8a0bd8e239cb59291b41f19a;hb=34e7dc81a94d39d48c5b4aac8cddcca46edba94d;hp=9d2f8d40beb0ce16705e138371401f3104493eb8;hpb=5f6a1245ffa9276c1af59b0835afeef67e2fb5b1;p=youtube-dl diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index 9d2f8d40b..24746a09a 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -7,9 +7,11 @@ import hashlib import uuid from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( ExtractorError, int_or_none, unified_strdate, @@ -67,6 +69,7 @@ class SmotriIE(InfoExtractor): 'params': { 'videopassword': 'qwerty', }, + 'skip': 'Video is not approved by moderator', }, # age limit + video-password { @@ -84,12 +87,28 @@ class SmotriIE(InfoExtractor): }, 'params': { 'videopassword': '333' - } + }, + 'skip': 'Video is not approved by moderator', + }, + # not approved by moderator, but available + { + 'url': 'http://smotri.com/video/view/?id=v28888533b73', + 'md5': 'f44bc7adac90af518ef1ecf04893bb34', + 'info_dict': { + 'id': 'v28888533b73', + 'ext': 'mp4', + 'title': 'Russian Spies Killed By ISIL Child Soldier', + 'uploader': 'Mopeder', + 'uploader_id': 'mopeder', + 'duration': 71, + 'thumbnail': 'http://frame9.loadup.ru/d7/32/2888853.2.3.jpg', + 'upload_date': '20150114', + }, }, # swf player { 'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500', - 'md5': '4d47034979d9390d14acdf59c4935bc2', + 'md5': '31099eeb4bc906712c5f40092045108d', 'info_dict': { 'id': 'v9188090500', 'ext': 'mp4', @@ -120,9 +139,6 @@ class SmotriIE(InfoExtractor): def _search_meta(self, name, html, display_name=None): if display_name is None: display_name = name - return self._html_search_regex( - r'' % re.escape(name), - html, display_name, fatal=False) return self._html_search_meta(name, html, display_name) def _real_extract(self, url): @@ -142,13 +158,16 @@ class SmotriIE(InfoExtractor): video = self._download_json(request, video_id, 'Downloading video JSON') - if video.get('_moderate_no') or not video.get('moderated'): - raise ExtractorError('Video %s has not been approved by moderator' % video_id, expected=True) + video_url = video.get('_vidURL') or video.get('_vidURL_mp4') - if video.get('error'): - raise ExtractorError('Video %s does not exist' % video_id, expected=True) + if not video_url: + if video.get('_moderate_no') or not video.get('moderated'): + raise ExtractorError( + 'Video %s has not been approved by moderator' % video_id, expected=True) + + if video.get('error'): + raise ExtractorError('Video %s does not exist' % video_id, expected=True) - video_url = video.get('_vidURL') or video.get('_vidURL_mp4') title = video['title'] thumbnail = video['_imgURL'] upload_date = unified_strdate(video['added']) @@ -184,7 +203,7 @@ class SmotriIE(InfoExtractor): view_count = self._html_search_regex( 'Общее количество просмотров.*?(\\d+)', - webpage, 'view count', fatal=False, flags=re.MULTILINE|re.DOTALL) + webpage, 'view count', fatal=False, flags=re.MULTILINE | re.DOTALL) return { 'id': video_id, @@ -274,15 +293,18 @@ class SmotriBroadcastIE(InfoExtractor): broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page') if re.search('>Режиссер с логином
"%s"
не существует<' % broadcast_id, broadcast_page) is not None: - raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True) + raise ExtractorError( + 'Broadcast %s does not exist' % broadcast_id, expected=True) # Adult content if re.search('EroConfirmText">', broadcast_page) is not None: (username, password) = self._get_login_info() if username is None: - raise ExtractorError('Erotic broadcasts allowed only for registered users, ' - 'use --username and --password options to provide account credentials.', expected=True) + raise ExtractorError( + 'Erotic broadcasts allowed only for registered users, ' + 'use --username and --password options to provide account credentials.', + expected=True) login_form = { 'login-hint53': '1', @@ -291,9 +313,11 @@ class SmotriBroadcastIE(InfoExtractor): 'password': password, } - request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form)) + request = compat_urllib_request.Request( + broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') - broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age') + broadcast_page = self._download_webpage( + request, broadcast_id, 'Logging in and confirming age') if re.search('>Неверный логин или пароль<', broadcast_page) is not None: raise ExtractorError('Unable to log in: bad username or password', expected=True) @@ -303,7 +327,7 @@ class SmotriBroadcastIE(InfoExtractor): adult_content = False ticket = self._html_search_regex( - 'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);', + r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'([^']+)'\)", broadcast_page, 'broadcast ticket') url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket @@ -312,26 +336,31 @@ class SmotriBroadcastIE(InfoExtractor): if broadcast_password: url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest() - broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON') + broadcast_json_page = self._download_webpage( + url, broadcast_id, 'Downloading broadcast JSON') try: broadcast_json = json.loads(broadcast_json_page) protected_broadcast = broadcast_json['_pass_protected'] == 1 if protected_broadcast and not broadcast_password: - raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True) + raise ExtractorError( + 'This broadcast is protected by a password, use the --video-password option', + expected=True) broadcast_offline = broadcast_json['is_play'] == 0 if broadcast_offline: raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True) rtmp_url = broadcast_json['_server'] - if not rtmp_url.startswith('rtmp://'): + mobj = re.search(r'^rtmp://[^/]+/(?P.+)/?$', rtmp_url) + if not mobj: raise ExtractorError('Unexpected broadcast rtmp URL') broadcast_playpath = broadcast_json['_streamName'] + broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL']) broadcast_thumbnail = broadcast_json['_imgURL'] - broadcast_title = broadcast_json['title'] + broadcast_title = self._live_title(broadcast_json['title']) broadcast_description = broadcast_json['description'] broadcaster_nick = broadcast_json['nick'] broadcaster_login = broadcast_json['login'] @@ -352,6 +381,9 @@ class SmotriBroadcastIE(InfoExtractor): 'age_limit': 18 if adult_content else 0, 'ext': 'flv', 'play_path': broadcast_playpath, + 'player_url': 'http://pics.smotri.com/broadcast_play.swf', + 'app': broadcast_app, 'rtmp_live': True, - 'rtmp_conn': rtmp_conn + 'rtmp_conn': rtmp_conn, + 'is_live': True, }