projects
/
youtube-dl
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
[pornhub] Fix view count extraction (#26621) (refs #26614)
[youtube-dl]
/
youtube_dl
/
extractor
/
keezmovies.py
diff --git
a/youtube_dl/extractor/keezmovies.py
b/youtube_dl/extractor/keezmovies.py
index b002c0dd10684d006c775d0271d67f80fcba8a26..c3eb74c1742bedc23ae125384b9ba39965f6cd95 100644
(file)
--- a/
youtube_dl/extractor/keezmovies.py
+++ b/
youtube_dl/extractor/keezmovies.py
@@
-4,43
+4,42
@@
import re
from .common import InfoExtractor
from ..aes import aes_decrypt_text
from .common import InfoExtractor
from ..aes import aes_decrypt_text
-from ..compat import (
- compat_str,
- compat_urllib_parse_unquote,
-)
+from ..compat import compat_urllib_parse_unquote
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
str_to_int,
strip_or_none,
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
str_to_int,
strip_or_none,
+ url_or_none,
)
class KeezMoviesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)'
_TESTS = [{
)
class KeezMoviesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)'
_TESTS = [{
- 'url': 'http
://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-121471
1',
- 'md5': '
1c1e75d22ffa53320f45eeb07bc4cdc0
',
+ 'url': 'http
s://www.keezmovies.com/video/arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money-1807068
1',
+ 'md5': '
2ac69cdb882055f71d82db4311732a1a
',
'info_dict': {
'info_dict': {
- 'id': '1
21471
1',
- 'display_id': '
petite-asian-lady-mai-playing-in-bathtub
',
+ 'id': '1
807068
1',
+ 'display_id': '
arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money
',
'ext': 'mp4',
'ext': 'mp4',
- 'title': '
Petite Asian Lady Mai Playing In Bathtub
',
- 'thumbnail':
're:^https?://.*\.jpg$'
,
+ 'title': '
Arab wife want it so bad I see she thirsty and has tiny money.
',
+ 'thumbnail':
None
,
'view_count': int,
'age_limit': 18,
}
}, {
'view_count': int,
'age_limit': 18,
}
}, {
- 'url': 'http://www.keezmovies.com/video/1
21471
1',
+ 'url': 'http://www.keezmovies.com/video/1
807068
1',
'only_matching': True,
}]
'only_matching': True,
}]
- def _extract_info(self, url):
+ def _extract_info(self, url
, fatal=True
):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
- display_id = (mobj.group('display_id') if 'display_id'
- in mobj.groupdict() else None) or mobj.group('id')
+ display_id = (mobj.group('display_id')
+ if 'display_id' in mobj.groupdict()
+ else None) or mobj.group('id')
webpage = self._download_webpage(
url, display_id, headers={'Cookie': 'age_verified=1'})
webpage = self._download_webpage(
url, display_id, headers={'Cookie': 'age_verified=1'})
@@
-54,7
+53,8
@@
class KeezMoviesIE(InfoExtractor):
encrypted = False
def extract_format(format_url, height=None):
encrypted = False
def extract_format(format_url, height=None):
- if not isinstance(format_url, compat_str) or not format_url.startswith('http'):
+ format_url = url_or_none(format_url)
+ if not format_url or not format_url.startswith(('http', '//')):
return
if format_url in format_urls:
return
return
if format_url in format_urls:
return
@@
-104,7
+104,11
@@
class KeezMoviesIE(InfoExtractor):
raise ExtractorError(
'Video %s is no longer available' % video_id, expected=True)
raise ExtractorError(
'Video %s is no longer available' % video_id, expected=True)
- self._sort_formats(formats)
+ try:
+ self._sort_formats(formats)
+ except ExtractorError:
+ if fatal:
+ raise
if not title:
title = self._html_search_regex(
if not title:
title = self._html_search_regex(
@@
-121,7
+125,9
@@
class KeezMoviesIE(InfoExtractor):
}
def _real_extract(self, url):
}
def _real_extract(self, url):
- webpage, info = self._extract_info(url)
+ webpage, info = self._extract_info(url, fatal=False)
+ if not info['formats']:
+ return self.url_result(url, 'Generic')
info['view_count'] = str_to_int(self._search_regex(
r'<b>([\d,.]+)</b> Views?', webpage, 'view count', fatal=False))
return info
info['view_count'] = str_to_int(self._search_regex(
r'<b>([\d,.]+)</b> Views?', webpage, 'view count', fatal=False))
return info