X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Fpornhub.py;h=3a27e37890dc78b26af866c9884807c97c56ccb9;hb=8a48223a7ba5a3d1e47a36c30dd925e11183ae63;hp=634142d0d27300eb82ea2f460fd2163a20208709;hpb=121c09c7be1ac2944f3432122104c1952bfd1f04;p=youtube-dl
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index 634142d0d..3a27e3789 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -10,6 +10,7 @@ from ..compat import (
compat_urllib_request,
)
from ..utils import (
+ ExtractorError,
str_to_int,
)
from ..aes import (
@@ -44,9 +45,18 @@ class PornHubIE(InfoExtractor):
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
+ error_msg = self._html_search_regex(
+ r'(?s)
(.*?)
',
+ webpage, 'error message', default=None)
+ if error_msg:
+ error_msg = re.sub(r'\s+', ' ', error_msg)
+ raise ExtractorError(
+ 'PornHub said: %s' % error_msg,
+ expected=True, video_id=video_id)
+
video_title = self._html_search_regex(r']+>([^<]+)', webpage, 'title')
video_uploader = self._html_search_regex(
- r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|]+>(.+?)<',
webpage, 'uploader', fatal=False)
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
if thumbnail:
@@ -100,3 +110,33 @@ class PornHubIE(InfoExtractor):
'formats': formats,
'age_limit': 18,
}
+
+
+class PornHubPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P\d+)'
+ _TESTS = [{
+ 'url': 'http://www.pornhub.com/playlist/6201671',
+ 'info_dict': {
+ 'id': '6201671',
+ 'title': 'P0p4',
+ },
+ 'playlist_mincount': 35,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub')
+ for video_url in set(re.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage))
+ ]
+
+ playlist = self._parse_json(
+ self._search_regex(
+ r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'),
+ playlist_id)
+
+ return self.playlist_result(
+ entries, playlist_id, playlist.get('title'), playlist.get('description'))