X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=youtube-dl;a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fpornhub.py;fp=youtube_dl%2Fextractor%2Fpornhub.py;h=641083da7b4b0778a78e4f5af044202081a5af9c;hp=428324ef0b72366d4700372f826ad9c7c0fb9cb6;hb=5dda1edef93d94c9a49672f905df0c49c75c5739;hpb=d2d970d07ec82f648b62bff8b15ac0b57d0d0496 diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 428324ef0..641083da7 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -302,17 +302,12 @@ class PornHubIE(PornHubBaseIE): comment_count = self._extract_count( r'All Comments\s*\(([\d,.]+)\)', webpage, 'comment') - def _get_items(class_name): + def extract_list(meta_key): div = self._search_regex( - r'
([\S\s]+?)
', - webpage, class_name, default=None) + r'(?s)]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)' + % meta_key, webpage, meta_key, default=None) if div: - return [a for a in re.findall(r']+>([^<]+)', div)] - else: - return None - - categories = _get_items('categoriesWrapper') - tags = _get_items('tagsWrapper') + return re.findall(r']+\bhref=[^>]+>([^<]+)', div) return { 'id': video_id, @@ -327,8 +322,8 @@ class PornHubIE(PornHubBaseIE): 'comment_count': comment_count, 'formats': formats, 'age_limit': 18, - 'tags': tags, - 'categories': categories, + 'tags': extract_list('tags'), + 'categories': extract_list('categories'), 'subtitles': subtitles, }