projects
/
youtube-dl
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
1911d77
)
[pornhub:playlist] Improve extraction (closes #11594)
author
Sergey M․
<dstftw@gmail.com>
Tue, 3 Jan 2017 22:32:18 +0000
(
05:32
+0700)
committer
Sergey M․
<dstftw@gmail.com>
Tue, 3 Jan 2017 22:32:18 +0000
(
05:32
+0700)
youtube_dl/extractor/pornhub.py
patch
|
blob
|
history
diff --git
a/youtube_dl/extractor/pornhub.py
b/youtube_dl/extractor/pornhub.py
index 40dbe6967fac2126b7bf6e6a1245768b3c039c8e..3eaf56973ec35072d8f0549c5850357ca94ed12b 100644
(file)
--- a/
youtube_dl/extractor/pornhub.py
+++ b/
youtube_dl/extractor/pornhub.py
@@
-229,7
+229,14
@@
class PornHubPlaylistBaseIE(InfoExtractor):
webpage = self._download_webpage(url, playlist_id)
webpage = self._download_webpage(url, playlist_id)
- entries = self._extract_entries(webpage)
+ # Only process container div with main playlist content skipping
+ # drop-down menu that uses similar pattern for videos (see
+ # https://github.com/rg3/youtube-dl/issues/11594).
+ container = self._search_regex(
+ r'(?s)(<div[^>]+class=["\']container.+)', webpage,
+ 'container', default=webpage)
+
+ entries = self._extract_entries(container)
playlist = self._parse_json(
self._search_regex(
playlist = self._parse_json(
self._search_regex(
@@
-243,12
+250,12
@@
class PornHubPlaylistBaseIE(InfoExtractor):
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
_TESTS = [{
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
_TESTS = [{
- 'url': 'http://www.pornhub.com/playlist/
620167
1',
+ 'url': 'http://www.pornhub.com/playlist/
466735
1',
'info_dict': {
'info_dict': {
- 'id': '
620167
1',
- 'title': '
P0p4
',
+ 'id': '
466735
1',
+ 'title': '
Nataly Hot
',
},
},
- 'playlist_mincount':
35
,
+ 'playlist_mincount':
2
,
}]
}]