X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fsmotri.py;h=99f5b19d2dd68e78aaf2f45f79fad60b8bb459dc;hb=26dca1661ebb43af53c785b69eddcdec9ac120d1;hp=f86ee8388c3bce34d7b92931dbbe185a0d7673df;hpb=55f6597c67dd04729dbc1b83d81bfbd63d7e9c0a;p=youtube-dl diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index f86ee8388..99f5b19d2 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -1,5 +1,6 @@ # encoding: utf-8 +import os.path import re import json import hashlib @@ -9,7 +10,8 @@ from .common import InfoExtractor from ..utils import ( compat_urllib_parse, compat_urllib_request, - ExtractorError + ExtractorError, + url_basename, ) @@ -132,7 +134,16 @@ class SmotriIE(InfoExtractor): # We will extract some from the video web page instead video_page_url = 'http://' + mobj.group('url') video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page') - + + # Warning if video is unavailable + warning = self._html_search_regex( + r'
(.*?)
', video_page, + u'warning message', default=None) + if warning is not None: + self._downloader.report_warning( + u'Video %s may not be available; smotri said: %s ' % + (video_id, warning)) + # Adult content if re.search(u'EroConfirmText">', video_page) is not None: self.report_age_confirmation() @@ -148,38 +159,44 @@ class SmotriIE(InfoExtractor): # Extract the rest of meta data video_title = self._search_meta(u'name', video_page, u'title') if not video_title: - video_title = video_url.rsplit('/', 1)[-1] + video_title = os.path.splitext(url_basename(video_url))[0] video_description = self._search_meta(u'description', video_page) END_TEXT = u' на сайте Smotri.com' - if video_description.endswith(END_TEXT): + if video_description and video_description.endswith(END_TEXT): video_description = video_description[:-len(END_TEXT)] START_TEXT = u'Смотреть онлайн ролик ' - if video_description.startswith(START_TEXT): + if video_description and video_description.startswith(START_TEXT): video_description = video_description[len(START_TEXT):] video_thumbnail = self._search_meta(u'thumbnail', video_page) upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date') - upload_date_m = re.search(r'(?P\d{4})\.(?P\d{2})\.(?P\d{2})T', upload_date_str) - video_upload_date = ( - ( - upload_date_m.group('year') + - upload_date_m.group('month') + - upload_date_m.group('day') + if upload_date_str: + upload_date_m = re.search(r'(?P\d{4})\.(?P\d{2})\.(?P\d{2})T', upload_date_str) + video_upload_date = ( + ( + upload_date_m.group('year') + + upload_date_m.group('month') + + upload_date_m.group('day') + ) + if upload_date_m else None ) - if upload_date_m else None - ) + else: + video_upload_date = None duration_str = self._search_meta(u'duration', video_page) - duration_m = re.search(r'T(?P[0-9]{2})H(?P[0-9]{2})M(?P[0-9]{2})S', duration_str) - video_duration = ( - ( - (int(duration_m.group('hours')) * 60 * 60) + - (int(duration_m.group('minutes')) * 60) + - int(duration_m.group('seconds')) + if duration_str: + duration_m = re.search(r'T(?P[0-9]{2})H(?P[0-9]{2})M(?P[0-9]{2})S', duration_str) + video_duration = ( + ( + (int(duration_m.group('hours')) * 60 * 60) + + (int(duration_m.group('minutes')) * 60) + + int(duration_m.group('seconds')) + ) + if duration_m else None ) - if duration_m else None - ) + else: + video_duration = None video_uploader = self._html_search_regex( u'
Автор.*?onmouseover="popup_user_info[^"]+">(.*?)', @@ -202,7 +219,7 @@ class SmotriIE(InfoExtractor): 'uploader': video_uploader, 'upload_date': video_upload_date, 'uploader_id': video_uploader_id, - 'video_duration': video_duration, + 'duration': video_duration, 'view_count': video_view_count, 'age_limit': 18 if adult_content else 0, 'video_page_url': video_page_url @@ -302,7 +319,7 @@ class SmotriBroadcastIE(InfoExtractor): adult_content = False ticket = self._html_search_regex( - u'window.broadcast_control.addFlashVar\\(\'file\', \'([^\']+)\'\\);', + u'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);', broadcast_page, u'broadcast ticket') url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket @@ -353,4 +370,4 @@ class SmotriBroadcastIE(InfoExtractor): 'play_path': broadcast_playpath, 'rtmp_live': True, 'rtmp_conn': rtmp_conn - } \ No newline at end of file + }