X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcloudy.py;h=85ca20eccd0c5bb2e8e39468d6ad62428d918059;hb=HEAD;hp=386f080d241d19b30b131d2700a25ddf8de0ecc8;hpb=2eebf060af9fe284cbcb839886b27030553fb48d;p=youtube-dl diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py index 386f080d2..85ca20ecc 100644 --- a/youtube_dl/extractor/cloudy.py +++ b/youtube_dl/extractor/cloudy.py @@ -1,108 +1,60 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( - ExtractorError, - compat_parse_qs, - compat_urllib_parse, - remove_end, - HEADRequest, - compat_HTTPError, + str_to_int, + unified_strdate, ) class CloudyIE(InfoExtractor): - _IE_DESC = 'cloudy.ec and videoraj.ch' - _VALID_URL = r'''(?x) - https?://(?:www\.)?(?Pcloudy\.ec|videoraj\.ch)/ - (?:v/|embed\.php\?id=) - (?P[A-Za-z0-9]+) - ''' - _EMBED_URL = 'http://www.%s/embed.php?id=%s' - _API_URL = 'http://www.%s/api/player.api.php?%s' - _MAX_TRIES = 2 - _TESTS = [ - { - 'url': 'https://www.cloudy.ec/v/af511e2527aac', - 'md5': '5cb253ace826a42f35b4740539bedf07', - 'info_dict': { - 'id': 'af511e2527aac', - 'ext': 'flv', - 'title': 'Funny Cats and Animals Compilation june 2013', - } - }, - { - 'url': 'http://www.videoraj.ch/v/47f399fd8bb60', - 'md5': '7d0f8799d91efd4eda26587421c3c3b0', - 'info_dict': { - 'id': '47f399fd8bb60', - 'ext': 'flv', - 'title': 'Burning a New iPhone 5 with Gasoline - Will it Survive?', - } + _IE_DESC = 'cloudy.ec' + _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P[A-Za-z0-9]+)' + _TESTS = [{ + 'url': 'https://www.cloudy.ec/v/af511e2527aac', + 'md5': '29832b05028ead1b58be86bf319397ca', + 'info_dict': { + 'id': 'af511e2527aac', + 'ext': 'mp4', + 'title': 'Funny Cats and Animals Compilation june 2013', + 'upload_date': '20130913', + 'view_count': int, } - ] - - def _extract_video(self, video_host, video_id, file_key, error_url=None, try_num=0): + }, { + 'url': 'http://www.cloudy.ec/embed.php?autoplay=1&id=af511e2527aac', + 'only_matching': True, + }] - if try_num > self._MAX_TRIES - 1: - raise ExtractorError('Unable to extract video URL', expected=True) - - form = { - 'file': video_id, - 'key': file_key, - } + def _real_extract(self, url): + video_id = self._match_id(url) - if error_url: - form.update({ - 'numOfErrors': try_num, - 'errorCode': '404', - 'errorUrl': error_url, + webpage = self._download_webpage( + 'https://www.cloudy.ec/embed.php', video_id, query={ + 'id': video_id, + 'playerPage': 1, + 'autoplay': 1, }) - data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode(form)) - player_data = self._download_webpage( - data_url, video_id, 'Downloading player data') - data = compat_parse_qs(player_data) - - try_num += 1 - - if 'error' in data: - raise ExtractorError( - '%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])), - expected=True) + info = self._parse_html5_media_entries(url, webpage, video_id)[0] - title = data.get('title', [None])[0] - if title: - title = remove_end(title, '&asdasdas').strip() + webpage = self._download_webpage( + 'https://www.cloudy.ec/v/%s' % video_id, video_id, fatal=False) - video_url = data.get('url', [None])[0] - - if video_url: - try: - self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL') - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]: - self.report_warning('Invalid video URL, requesting another', video_id) - return self._extract_video(video_host, video_id, file_key, video_url, try_num) - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_host = mobj.group('host') - video_id = mobj.group('id') + if webpage: + info.update({ + 'title': self._search_regex( + r']*>([^<]+)<', webpage, 'title'), + 'upload_date': unified_strdate(self._search_regex( + r'>Published at (\d{4}-\d{1,2}-\d{1,2})', webpage, + 'upload date', fatal=False)), + 'view_count': str_to_int(self._search_regex( + r'([\d,.]+) views<', webpage, 'view count', fatal=False)), + }) - url = self._EMBED_URL % (video_host, video_id) - webpage = self._download_webpage(url, video_id) + if not info.get('title'): + info['title'] = video_id - file_key = self._search_regex( - r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key') + info['id'] = video_id - return self._extract_video(video_host, video_id, file_key) + return info