X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fpornhub.py;h=529f3f7119fd4e93a8c82168d910ab4fc3d1720e;hb=HEAD;hp=75ed69cde3db4c4116be3089f77f8f43f3205271;hpb=f41347260c2c2cf723bc2bb8a5c11f67a22175d5;p=youtube-dl diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 75ed69cde..529f3f711 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -17,6 +17,8 @@ from ..utils import ( determine_ext, ExtractorError, int_or_none, + merge_dicts, + NO_DEFAULT, orderedSet, remove_quotes, str_to_int, @@ -51,20 +53,21 @@ class PornHubIE(PornHubBaseIE): _VALID_URL = r'''(?x) https?:// (?: - (?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| + (?:[^/]+\.)?(?Ppornhub(?:premium)?\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| (?:www\.)?thumbzilla\.com/video/ ) (?P[\da-z]+) ''' _TESTS = [{ 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', - 'md5': '1e19b41231a02eba417839222ac9d58e', + 'md5': 'a6391306d050e4547f62b3f485dd9ba9', 'info_dict': { 'id': '648719015', 'ext': 'mp4', 'title': 'Seductive Indian beauty strips down and fingers her pink pussy', 'uploader': 'Babes', 'upload_date': '20130628', + 'timestamp': 1372447216, 'duration': 361, 'view_count': int, 'like_count': int, @@ -81,8 +84,8 @@ class PornHubIE(PornHubBaseIE): 'id': '1331683002', 'ext': 'mp4', 'title': '重庆婷婷女王足交', - 'uploader': 'Unknown', 'upload_date': '20150213', + 'timestamp': 1423804862, 'duration': 1753, 'view_count': int, 'like_count': int, @@ -120,6 +123,7 @@ class PornHubIE(PornHubBaseIE): 'params': { 'skip_download': True, }, + 'skip': 'This video has been disabled', }, { 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', 'only_matching': True, @@ -148,6 +152,9 @@ class PornHubIE(PornHubBaseIE): }, { 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933', 'only_matching': True, + }, { + 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82', + 'only_matching': True, }] @staticmethod @@ -165,6 +172,13 @@ class PornHubIE(PornHubBaseIE): host = mobj.group('host') or 'pornhub.com' video_id = mobj.group('id') + if 'premium' in host: + if not self._downloader.params.get('cookiefile'): + raise ExtractorError( + 'PornHub Premium requires authentication.' + ' You may want to use --cookies.', + expected=True) + self._set_cookie(host, 'age_verified', '1') def dl_webpage(platform): @@ -188,10 +202,10 @@ class PornHubIE(PornHubBaseIE): # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying # on that anymore. title = self._html_search_meta( - 'twitter:title', webpage, default=None) or self._search_regex( - (r']+class=["\']title["\'][^>]*>(?P[^<]+)', - r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1', - r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'), + 'twitter:title', webpage, default=None) or self._html_search_regex( + (r'(?s)<h1[^>]+class=["\']title["\'][^>]*>(?P<title>.+?)</h1>', + r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1', + r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'), webpage, 'title', group='title') video_urls = [] @@ -227,9 +241,9 @@ class PornHubIE(PornHubBaseIE): else: thumbnail, duration = [None] * 2 - def extract_js_vars(webpage, pattern, fatal=True): + def extract_js_vars(webpage, pattern, default=NO_DEFAULT): assignments = self._search_regex( - pattern, webpage, 'encoded url', fatal=fatal) + pattern, webpage, 'encoded url', default=default) if not assignments: return {} @@ -270,11 +284,15 @@ class PornHubIE(PornHubBaseIE): FORMAT_PREFIXES = ('media', 'quality') js_vars = extract_js_vars( webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES), - fatal=False) + default=None) if js_vars: for key, format_url in js_vars.items(): if any(key.startswith(p) for p in FORMAT_PREFIXES): add_video_url(format_url) + if not video_urls and re.search( + r'<[^>]+\bid=["\']lockedPlayer', webpage): + raise ExtractorError( + 'Video %s is locked' % video_id, expected=True) if not video_urls: js_vars = extract_js_vars( @@ -323,10 +341,10 @@ class PornHubIE(PornHubBaseIE): video_uploader = self._html_search_regex( r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', - webpage, 'uploader', fatal=False) + webpage, 'uploader', default=None) view_count = self._extract_count( - r'<span class="count">([\d,\.]+)</span> views', webpage, 'view') + r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view') like_count = self._extract_count( r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like') dislike_count = self._extract_count( @@ -341,7 +359,11 @@ class PornHubIE(PornHubBaseIE): if div: return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div) - return { + info = self._search_json_ld(webpage, video_id, default={}) + # description provided in JSON-LD is irrelevant + info['description'] = None + + return merge_dicts({ 'id': video_id, 'uploader': video_uploader, 'upload_date': upload_date, @@ -357,7 +379,7 @@ class PornHubIE(PornHubBaseIE): 'tags': extract_list('tags'), 'categories': extract_list('categories'), 'subtitles': subtitles, - } + }, info) class PornHubPlaylistBaseIE(PornHubBaseIE): @@ -400,7 +422,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE): class PornHubUserIE(PornHubPlaylistBaseIE): - _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' + _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph', 'playlist_mincount': 118, @@ -468,7 +490,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)' + _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)' _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph/videos', 'only_matching': True, @@ -583,7 +605,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): - _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' + _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' _TESTS = [{ 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', 'info_dict': {