X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fsmotri.py;h=99f5b19d2dd68e78aaf2f45f79fad60b8bb459dc;hb=08d13955dd56027d8ca61b70c6cbab6925367fb4;hp=a589a893bea8b6c1f32f59d3c7d2b1edf24af740;hpb=1fb8f092736a64a375134e92db1609a3be9fc768;p=youtube-dl diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index a589a893b..99f5b19d2 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -1,5 +1,6 @@ # encoding: utf-8 +import os.path import re import json import hashlib @@ -10,6 +11,7 @@ from ..utils import ( compat_urllib_parse, compat_urllib_request, ExtractorError, + url_basename, ) @@ -132,7 +134,16 @@ class SmotriIE(InfoExtractor): # We will extract some from the video web page instead video_page_url = 'http://' + mobj.group('url') video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page') - + + # Warning if video is unavailable + warning = self._html_search_regex( + r'
(.*?)
', video_page, + u'warning message', default=None) + if warning is not None: + self._downloader.report_warning( + u'Video %s may not be available; smotri said: %s ' % + (video_id, warning)) + # Adult content if re.search(u'EroConfirmText">', video_page) is not None: self.report_age_confirmation() @@ -148,38 +159,44 @@ class SmotriIE(InfoExtractor): # Extract the rest of meta data video_title = self._search_meta(u'name', video_page, u'title') if not video_title: - video_title = video_url.rsplit('/', 1)[-1] + video_title = os.path.splitext(url_basename(video_url))[0] video_description = self._search_meta(u'description', video_page) END_TEXT = u' на сайте Smotri.com' - if video_description.endswith(END_TEXT): + if video_description and video_description.endswith(END_TEXT): video_description = video_description[:-len(END_TEXT)] START_TEXT = u'Смотреть онлайн ролик ' - if video_description.startswith(START_TEXT): + if video_description and video_description.startswith(START_TEXT): video_description = video_description[len(START_TEXT):] video_thumbnail = self._search_meta(u'thumbnail', video_page) upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date') - upload_date_m = re.search(r'(?P\d{4})\.(?P\d{2})\.(?P\d{2})T', upload_date_str) - video_upload_date = ( - ( - upload_date_m.group('year') + - upload_date_m.group('month') + - upload_date_m.group('day') + if upload_date_str: + upload_date_m = re.search(r'(?P\d{4})\.(?P\d{2})\.(?P\d{2})T', upload_date_str) + video_upload_date = ( + ( + upload_date_m.group('year') + + upload_date_m.group('month') + + upload_date_m.group('day') + ) + if upload_date_m else None ) - if upload_date_m else None - ) + else: + video_upload_date = None duration_str = self._search_meta(u'duration', video_page) - duration_m = re.search(r'T(?P[0-9]{2})H(?P[0-9]{2})M(?P[0-9]{2})S', duration_str) - video_duration = ( - ( - (int(duration_m.group('hours')) * 60 * 60) + - (int(duration_m.group('minutes')) * 60) + - int(duration_m.group('seconds')) + if duration_str: + duration_m = re.search(r'T(?P[0-9]{2})H(?P[0-9]{2})M(?P[0-9]{2})S', duration_str) + video_duration = ( + ( + (int(duration_m.group('hours')) * 60 * 60) + + (int(duration_m.group('minutes')) * 60) + + int(duration_m.group('seconds')) + ) + if duration_m else None ) - if duration_m else None - ) + else: + video_duration = None video_uploader = self._html_search_regex( u'
Автор.*?onmouseover="popup_user_info[^"]+">(.*?)',