projects
/
youtube-dl
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (from parent 1:
164fcbf
)
[xhamster] Extract categories (closes #11728)
author
Sergey M․
<dstftw@gmail.com>
Sun, 28 May 2017 00:50:15 +0000
(07:50 +0700)
committer
Sergey M․
<dstftw@gmail.com>
Sun, 28 May 2017 00:50:15 +0000
(07:50 +0700)
youtube_dl/extractor/xhamster.py
patch
|
blob
|
history
diff --git
a/youtube_dl/extractor/xhamster.py
b/youtube_dl/extractor/xhamster.py
index 7b67037140653cacdc5740f7c560280335de0a0d..7cf0629e7ea2838f5a918854fa39ba075935db1f 100644
(file)
--- a/
youtube_dl/extractor/xhamster.py
+++ b/
youtube_dl/extractor/xhamster.py
@@
-4,6
+4,7
@@
import re
from .common import InfoExtractor
from ..utils import (
from .common import InfoExtractor
from ..utils import (
+ clean_html,
dict_get,
ExtractorError,
int_or_none,
dict_get,
ExtractorError,
int_or_none,
@@
-25,6
+26,7
@@
class XHamsterIE(InfoExtractor):
'uploader': 'Ruseful2011',
'duration': 893,
'age_limit': 18,
'uploader': 'Ruseful2011',
'duration': 893,
'age_limit': 18,
+ 'categories': ['Fake Hub', 'Amateur', 'MILFs', 'POV', 'Boss', 'Office', 'Oral', 'Reality', 'Sexy'],
},
}, {
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
},
}, {
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
@@
-36,6
+38,7
@@
class XHamsterIE(InfoExtractor):
'uploader': 'jojo747400',
'duration': 200,
'age_limit': 18,
'uploader': 'jojo747400',
'duration': 200,
'age_limit': 18,
+ 'categories': ['Britney Spears', 'Celebrities', 'HD Videos', 'Sexy', 'Sexy Booty'],
},
'params': {
'skip_download': True,
},
'params': {
'skip_download': True,
@@
-51,6
+54,7
@@
class XHamsterIE(InfoExtractor):
'uploader': 'parejafree',
'duration': 72,
'age_limit': 18,
'uploader': 'parejafree',
'duration': 72,
'age_limit': 18,
+ 'categories': ['Amateur', 'Blowjobs'],
},
'params': {
'skip_download': True,
},
'params': {
'skip_download': True,
@@
-152,6
+156,12
@@
class XHamsterIE(InfoExtractor):
self._sort_formats(formats)
self._sort_formats(formats)
+ categories_html = self._search_regex(
+ r'(?s)<table.+?(<span>Categories:.+?)</table>', webpage,
+ 'categories', default=None)
+ categories = [clean_html(category) for category in re.findall(
+ r'<a[^>]+>(.+?)</a>', categories_html)] if categories_html else None
+
return {
'id': video_id,
'title': title,
return {
'id': video_id,
'title': title,
@@
-165,6
+175,7
@@
class XHamsterIE(InfoExtractor):
'dislike_count': int_or_none(dislike_count),
'comment_count': int_or_none(comment_count),
'age_limit': age_limit,
'dislike_count': int_or_none(dislike_count),
'comment_count': int_or_none(comment_count),
'age_limit': age_limit,
+ 'categories': categories,
'formats': formats,
}
'formats': formats,
}