X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fpornhub.py;h=a7099bbbffe85e09659ec79acd2c384746c18e60;hb=a7298f3e99c8373d8c60be42f00d3b978d9b176a;hp=e032817f2c18634561dda71f786782373c0b38f8;hpb=9edcdac90cc871e6d6e497dc0fb099e40b99ff02;p=youtube-dl
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index e032817f2..a7099bbbf 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -114,13 +114,14 @@ class PornHubIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
+
+ self._set_cookie('pornhub.com', 'age_verified', '1')
def dl_webpage(platform):
+ self._set_cookie('pornhub.com', 'platform', platform)
return self._download_webpage(
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
- video_id, headers={
- 'Cookie': 'age_verified=1; platform=%s' % platform,
- })
+ video_id)
webpage = dl_webpage('pc')
@@ -186,7 +187,7 @@ class PornHubIE(InfoExtractor):
title, thumbnail, duration = [None] * 3
video_uploader = self._html_search_regex(
- r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
+ r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:user|channel)s/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
webpage, 'uploader', fatal=False)
view_count = self._extract_count(
@@ -227,13 +228,20 @@ class PornHubIE(InfoExtractor):
class PornHubPlaylistBaseIE(InfoExtractor):
def _extract_entries(self, webpage):
+ # Only process container div with main playlist content skipping
+ # drop-down menu that uses similar pattern for videos (see
+ # https://github.com/rg3/youtube-dl/issues/11594).
+ container = self._search_regex(
+ r'(?s)(
]+class=["\']container.+)', webpage,
+ 'container', default=webpage)
+
return [
self.url_result(
'http://www.pornhub.com/%s' % video_url,
PornHubIE.ie_key(), video_title=title)
for video_url, title in orderedSet(re.findall(
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
- webpage))
+ container))
]
def _real_extract(self, url):
@@ -241,14 +249,7 @@ class PornHubPlaylistBaseIE(InfoExtractor):
webpage = self._download_webpage(url, playlist_id)
- # Only process container div with main playlist content skipping
- # drop-down menu that uses similar pattern for videos (see
- # https://github.com/rg3/youtube-dl/issues/11594).
- container = self._search_regex(
- r'(?s)(
]+class=["\']container.+)', webpage,
- 'container', default=webpage)
-
- entries = self._extract_entries(container)
+ entries = self._extract_entries(webpage)
playlist = self._parse_json(
self._search_regex(
@@ -275,7 +276,7 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):
class PornHubUserVideosIE(PornHubPlaylistBaseIE):
- _VALID_URL = r'https?://(?:www\.)?pornhub\.com/users/(?P[^/]+)/videos'
+ _VALID_URL = r'https?://(?:www\.)?pornhub\.com/(?:user|channel)s/(?P[^/]+)/videos'
_TESTS = [{
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
'info_dict': {
@@ -285,6 +286,25 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
}, {
'url': 'http://www.pornhub.com/users/rushandlia/videos',
'only_matching': True,
+ }, {
+ # default sorting as Top Rated Videos
+ 'url': 'https://www.pornhub.com/channels/povd/videos',
+ 'info_dict': {
+ 'id': 'povd',
+ },
+ 'playlist_mincount': 293,
+ }, {
+ # Top Rated Videos
+ 'url': 'https://www.pornhub.com/channels/povd/videos?o=ra',
+ 'only_matching': True,
+ }, {
+ # Most Recent Videos
+ 'url': 'https://www.pornhub.com/channels/povd/videos?o=da',
+ 'only_matching': True,
+ }, {
+ # Most Viewed Videos
+ 'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
+ 'only_matching': True,
}]
def _real_extract(self, url):