X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fplayvid.py;h=4aef186ea22b4dab1be50a0bdd6dbcbbcae1e2b1;hb=HEAD;hp=a74f8f675b660855967c3498197da9e6aae65a85;hpb=777ac90791e6f105c4c617d22fac404cb316c4f9;p=youtube-dl diff --git a/youtube_dl/extractor/playvid.py b/youtube_dl/extractor/playvid.py index a74f8f675..4aef186ea 100644 --- a/youtube_dl/extractor/playvid.py +++ b/youtube_dl/extractor/playvid.py @@ -3,32 +3,49 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_unquote, + compat_urllib_parse_unquote_plus, +) from ..utils import ( - compat_urllib_parse, - determine_ext, + clean_html, + ExtractorError, ) -class PlayvidIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?www\.playvid\.com/watch(\?v=|/)(?P.+?)(#|$)' - _TEST = { - 'url': 'http://www.playvid.com/watch/agbDDi7WZTV', - 'file': 'agbDDi7WZTV.mp4', - 'md5': '44930f8afa616efdf9482daf4fe53e1e', +class PlayvidIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?playvid\.com/watch(\?v=|/)(?P.+?)(?:#|$)' + _TESTS = [{ + 'url': 'http://www.playvid.com/watch/RnmBNgtrrJu', + 'md5': 'ffa2f6b2119af359f544388d8c01eb6c', 'info_dict': { - 'title': 'Michelle Lewin in Miami Beach', - 'duration': 240, + 'id': 'RnmBNgtrrJu', + 'ext': 'mp4', + 'title': 'md5:9256d01c6317e3f703848b5906880dc8', + 'duration': 82, 'age_limit': 18, - } - } + }, + 'skip': 'Video removed due to ToS', + }, { + 'url': 'http://www.playvid.com/watch/hwb0GpNkzgH', + 'md5': '39d49df503ad7b8f23a4432cbf046477', + 'info_dict': { + 'id': 'hwb0GpNkzgH', + 'ext': 'mp4', + 'title': 'Ellen Euro Cutie Blond Takes a Sexy Survey Get Facial in The Park', + 'age_limit': 18, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - self.report_extraction(video_id) + m_error = re.search( + r'
\s*
\s*
(?P.+?)
\s*
', webpage) + if m_error: + raise ExtractorError(clean_html(m_error.group('msg')), expected=True) video_title = None duration = None @@ -36,38 +53,35 @@ class PlayvidIE(InfoExtractor): formats = [] # most of the information is stored in the flashvars - flashvars_match = re.search(r'flashvars="(.+?)"',webpage) - - if flashvars_match: - infos = compat_urllib_parse.unquote(flashvars_match.group(1)).split(r'&') - for info in infos: - videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$',info) - if videovars_match: - key = videovars_match.group(1) - val = videovars_match.group(2) + flashvars = self._html_search_regex( + r'flashvars="(.+?)"', webpage, 'flashvars') - if key == 'title': - video_title = val.replace('+',' ') - if key == 'duration': - try: - duration = val - except ValueError: - duration = None - if key == 'big_thumb': - video_thumbnail = val + infos = compat_urllib_parse_unquote(flashvars).split(r'&') + for info in infos: + videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info) + if videovars_match: + key = videovars_match.group(1) + val = videovars_match.group(2) - videourl_match = re.match(r'^video_urls\]\[(?P\d+)p',key) - if videourl_match: - resolution = int(videourl_match.group('resolution')) - formats.append({ - 'resolution': resolution, # 360, 480, ... - 'ext': determine_ext(val), - 'url': val - }) + if key == 'title': + video_title = compat_urllib_parse_unquote_plus(val) + if key == 'duration': + try: + duration = int(val) + except ValueError: + pass + if key == 'big_thumb': + video_thumbnail = val - # fatal error, if no download url is found - if len(formats) == 0: - raise ExtractorError,'no video url found' + videourl_match = re.match( + r'^video_urls\]\[(?P[0-9]+)p', key) + if videourl_match: + height = int(videourl_match.group('resolution')) + formats.append({ + 'height': height, + 'url': val, + }) + self._sort_formats(formats) # Extract title - should be in the flashvars; if not, look elsewhere if video_title is None: