X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fpornhub.py;h=b8f65af7cbc15c566c01422ee6229f197af3015b;hb=43ebf77df3bbd93dbbd0336b0243d8d50895ab72;hp=ba0ad7da29d188f5e920376805bf7532a1613bee;hpb=4bf568d36cf516b38e4634e07bd8b4c3d33324f1;p=youtube-dl
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index ba0ad7da2..b8f65af7c 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -17,6 +17,7 @@ from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
+ NO_DEFAULT,
orderedSet,
remove_quotes,
str_to_int,
@@ -188,10 +189,10 @@ class PornHubIE(PornHubBaseIE):
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
# on that anymore.
title = self._html_search_meta(
- 'twitter:title', webpage, default=None) or self._search_regex(
- (r'
]+class=["\']title["\'][^>]*>(?P[^<]+)',
- r']+data-video-title=(["\'])(?P
.+?)\1',
- r'shareTitle\s*=\s*(["\'])(?P.+?)\1'),
+ 'twitter:title', webpage, default=None) or self._html_search_regex(
+ (r'(?s)]+class=["\']title["\'][^>]*>(?P.+?)
',
+ r']+data-video-title=(["\'])(?P
(?:(?!\1).)+)\1',
+ r'shareTitle["\']\s*[=:]\s*(["\'])(?P(?:(?!\1).)+)\1'),
webpage, 'title', group='title')
video_urls = []
@@ -227,12 +228,13 @@ class PornHubIE(PornHubBaseIE):
else:
thumbnail, duration = [None] * 2
- if not video_urls:
- tv_webpage = dl_webpage('tv')
-
+ def extract_js_vars(webpage, pattern, default=NO_DEFAULT):
assignments = self._search_regex(
- r'(var.+?mediastring.+?)', tv_webpage,
- 'encoded url').split(';')
+ pattern, webpage, 'encoded url', default=default)
+ if not assignments:
+ return {}
+
+ assignments = assignments.split(';')
js_vars = {}
@@ -254,11 +256,35 @@ class PornHubIE(PornHubBaseIE):
assn = re.sub(r'var\s+', '', assn)
vname, value = assn.split('=', 1)
js_vars[vname] = parse_js_value(value)
+ return js_vars
- video_url = js_vars['mediastring']
- if video_url not in video_urls_set:
- video_urls.append((video_url, None))
- video_urls_set.add(video_url)
+ def add_video_url(video_url):
+ v_url = url_or_none(video_url)
+ if not v_url:
+ return
+ if v_url in video_urls_set:
+ return
+ video_urls.append((v_url, None))
+ video_urls_set.add(v_url)
+
+ if not video_urls:
+ FORMAT_PREFIXES = ('media', 'quality')
+ js_vars = extract_js_vars(
+ webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES),
+ default=None)
+ if js_vars:
+ for key, format_url in js_vars.items():
+ if any(key.startswith(p) for p in FORMAT_PREFIXES):
+ add_video_url(format_url)
+ if not video_urls and re.search(
+ r'<[^>]+\bid=["\']lockedPlayer', webpage):
+ raise ExtractorError(
+ 'Video %s is locked' % video_id, expected=True)
+
+ if not video_urls:
+ js_vars = extract_js_vars(
+ dl_webpage('tv'), r'(var.+?mediastring.+?)')
+ add_video_url(js_vars['mediastring'])
for mobj in re.finditer(
r']+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P(?:(?!\1).)+)\1',
@@ -276,10 +302,16 @@ class PornHubIE(PornHubBaseIE):
r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
if upload_date:
upload_date = upload_date.replace('/', '')
- if determine_ext(video_url) == 'mpd':
+ ext = determine_ext(video_url)
+ if ext == 'mpd':
formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id='dash', fatal=False))
continue
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ continue
tbr = None
mobj = re.search(r'(?P\d+)[pP]?_(?P\d+)[kK]', video_url)
if mobj: