X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Ffourtube.py;h=ad273a0e70c3fbd9087779d33829b817d9d70127;hb=757984af90ce924f917ce9d940ebd120e95a4c4e;hp=226ee67f072e3f783015e3fbc737ff08955d3b5a;hpb=b90b0c4ffa6f84a2ea5556d4df99de5f8ef2c7dd;p=youtube-dl diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py index 226ee67f0..ad273a0e7 100644 --- a/youtube_dl/extractor/fourtube.py +++ b/youtube_dl/extractor/fourtube.py @@ -3,9 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_urllib_request, -) +from ..compat import compat_urlparse from ..utils import ( parse_duration, parse_iso8601, @@ -13,31 +11,14 @@ from ..utils import ( ) -class FourTubeIE(InfoExtractor): - IE_NAME = '4tube' - _VALID_URL = r'https?://(?:www\.)?4tube\.com/videos/(?P\d+)' +class FourTubeBaseIE(InfoExtractor): + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + kind, video_id, display_id = mobj.group('kind', 'id', 'display_id') - _TEST = { - 'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black', - 'md5': '6516c8ac63b03de06bc8eac14362db4f', - 'info_dict': { - 'id': '209733', - 'ext': 'mp4', - 'title': 'Hot Babe Holly Michaels gets her ass stuffed by black', - 'uploader': 'WCP Club', - 'uploader_id': 'wcp-club', - 'upload_date': '20131031', - 'timestamp': 1383263892, - 'duration': 583, - 'view_count': int, - 'like_count': int, - 'categories': list, - 'age_limit': 18, - } - } + if kind == 'm' or not display_id: + url = self._URL_TEMPLATE % video_id - def _real_extract(self, url): - video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._html_search_meta('name', webpage) @@ -45,12 +26,14 @@ class FourTubeIE(InfoExtractor): 'uploadDate', webpage)) thumbnail = self._html_search_meta('thumbnailUrl', webpage) uploader_id = self._html_search_regex( - r'', webpage, 'uploader id') + r'', + webpage, 'uploader id', fatal=False) uploader = self._html_search_regex( - r'', webpage, 'uploader') + r'', + webpage, 'uploader', fatal=False) categories_html = self._search_regex( - r'(?s)>\s*Categories / Tags\s*.*?', + r'(?s)>\s*Categories / Tags\s*.*?', webpage, 'categories', fatal=False) categories = None if categories_html: @@ -59,32 +42,40 @@ class FourTubeIE(InfoExtractor): r'(?s)
  • (.*?)', categories_html)] view_count = str_to_int(self._search_regex( - r'', - webpage, 'view count', fatal=False)) + r']+itemprop="interactionCount"[^>]+content="UserPlays:([0-9,]+)">', + webpage, 'view count', default=None)) like_count = str_to_int(self._search_regex( - r'', - webpage, 'like count', fatal=False)) + r']+itemprop="interactionCount"[^>]+content="UserLikes:([0-9,]+)">', + webpage, 'like count', default=None)) duration = parse_duration(self._html_search_meta('duration', webpage)) - player_url = self._search_regex(r'