projects
/
youtube-dl
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Start moving to ytdl-org
[youtube-dl]
/
youtube_dl
/
extractor
/
pornhub.py
diff --git
a/youtube_dl/extractor/pornhub.py
b/youtube_dl/extractor/pornhub.py
index 428324ef0b72366d4700372f826ad9c7c0fb9cb6..3a474c17970bbd087de187bf6059e9900551f61f 100644
(file)
--- a/
youtube_dl/extractor/pornhub.py
+++ b/
youtube_dl/extractor/pornhub.py
@@
-302,17
+302,12
@@
class PornHubIE(PornHubBaseIE):
comment_count = self._extract_count(
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
comment_count = self._extract_count(
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
- def
_get_items(class_name
):
+ def
extract_list(meta_key
):
div = self._search_regex(
div = self._search_regex(
- r'
<div class="' + class_name + '">([\S\s]+?)</div>',
-
webpage, class_name
, default=None)
+ r'
(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
+
% meta_key, webpage, meta_key
, default=None)
if div:
if div:
- return [a for a in re.findall(r'<a href=[^>]+>([^<]+)', div)]
- else:
- return None
-
- categories = _get_items('categoriesWrapper')
- tags = _get_items('tagsWrapper')
+ return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
return {
'id': video_id,
return {
'id': video_id,
@@
-327,8
+322,8
@@
class PornHubIE(PornHubBaseIE):
'comment_count': comment_count,
'formats': formats,
'age_limit': 18,
'comment_count': comment_count,
'formats': formats,
'age_limit': 18,
- 'tags':
tags
,
- 'categories':
categories
,
+ 'tags':
extract_list('tags')
,
+ 'categories':
extract_list('categories')
,
'subtitles': subtitles,
}
'subtitles': subtitles,
}
@@
-337,7
+332,7
@@
class PornHubPlaylistBaseIE(PornHubBaseIE):
def _extract_entries(self, webpage, host):
# Only process container div with main playlist content skipping
# drop-down menu that uses similar pattern for videos (see
def _extract_entries(self, webpage, host):
# Only process container div with main playlist content skipping
# drop-down menu that uses similar pattern for videos (see
- # https://github.com/
rg3
/youtube-dl/issues/11594).
+ # https://github.com/
ytdl-org
/youtube-dl/issues/11594).
container = self._search_regex(
r'(?s)(<div[^>]+class=["\']container.+)', webpage,
'container', default=webpage)
container = self._search_regex(
r'(?s)(<div[^>]+class=["\']container.+)', webpage,
'container', default=webpage)