X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Ffourtube.py;h=be4e81342df38c50c7144db1fcb092d1cdd48c59;hb=HEAD;hp=226ee67f072e3f783015e3fbc737ff08955d3b5a;hpb=b90b0c4ffa6f84a2ea5556d4df99de5f8ef2c7dd;p=youtube-dl diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py index 226ee67f0..be4e81342 100644 --- a/youtube_dl/extractor/fourtube.py +++ b/youtube_dl/extractor/fourtube.py @@ -4,40 +4,49 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_request, + compat_b64decode, + compat_str, + compat_urllib_parse_unquote, + compat_urlparse, ) from ..utils import ( + int_or_none, parse_duration, parse_iso8601, + str_or_none, str_to_int, + try_get, + unified_timestamp, + url_or_none, ) -class FourTubeIE(InfoExtractor): - IE_NAME = '4tube' - _VALID_URL = r'https?://(?:www\.)?4tube\.com/videos/(?P\d+)' +class FourTubeBaseIE(InfoExtractor): + def _extract_formats(self, url, video_id, media_id, sources): + token_url = 'https://%s/%s/desktop/%s' % ( + self._TKN_HOST, media_id, '+'.join(sources)) - _TEST = { - 'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black', - 'md5': '6516c8ac63b03de06bc8eac14362db4f', - 'info_dict': { - 'id': '209733', - 'ext': 'mp4', - 'title': 'Hot Babe Holly Michaels gets her ass stuffed by black', - 'uploader': 'WCP Club', - 'uploader_id': 'wcp-club', - 'upload_date': '20131031', - 'timestamp': 1383263892, - 'duration': 583, - 'view_count': int, - 'like_count': int, - 'categories': list, - 'age_limit': 18, - } - } + parsed_url = compat_urlparse.urlparse(url) + tokens = self._download_json(token_url, video_id, data=b'', headers={ + 'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname), + 'Referer': url, + }) + formats = [{ + 'url': tokens[format]['token'], + 'format_id': format + 'p', + 'resolution': format + 'p', + 'quality': int(format), + } for format in sources] + self._sort_formats(formats) + return formats def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + kind, video_id, display_id = mobj.group('kind', 'id', 'display_id') + + if kind == 'm' or not display_id: + url = self._URL_TEMPLATE % video_id + webpage = self._download_webpage(url, video_id) title = self._html_search_meta('name', webpage) @@ -45,12 +54,14 @@ class FourTubeIE(InfoExtractor): 'uploadDate', webpage)) thumbnail = self._html_search_meta('thumbnailUrl', webpage) uploader_id = self._html_search_regex( - r'', webpage, 'uploader id') + r'', + webpage, 'uploader id', fatal=False) uploader = self._html_search_regex( - r'', webpage, 'uploader') + r'', + webpage, 'uploader', fatal=False) categories_html = self._search_regex( - r'(?s)>\s*Categories / Tags\s*.*?', + r'(?s)>\s*Categories / Tags\s*.*?', webpage, 'categories', fatal=False) categories = None if categories_html: @@ -59,39 +70,33 @@ class FourTubeIE(InfoExtractor): r'(?s)
  • (.*?)', categories_html)] view_count = str_to_int(self._search_regex( - r'', - webpage, 'view count', fatal=False)) + r']+itemprop="interactionCount"[^>]+content="UserPlays:([0-9,]+)">', + webpage, 'view count', default=None)) like_count = str_to_int(self._search_regex( - r'', - webpage, 'like count', fatal=False)) + r']+itemprop="interactionCount"[^>]+content="UserLikes:([0-9,]+)">', + webpage, 'like count', default=None)) duration = parse_duration(self._html_search_meta('duration', webpage)) - player_url = self._search_regex(r'