[pornhub] Fix view count extraction (#26621) (refs #26614)

[youtube-dl] / youtube_dl / extractor / keezmovies.py
diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py

index b002c0dd10684d006c775d0271d67f80fcba8a26..c3eb74c1742bedc23ae125384b9ba39965f6cd95 100644 (file)
--- a/youtube_dl/extractor/keezmovies.py
+++ b/youtube_dl/extractor/keezmovies.py
@@ -4,43 +4,42 @@ import re
  
  from .common import InfoExtractor
  from ..aes import aes_decrypt_text
  
  from .common import InfoExtractor
  from ..aes import aes_decrypt_text
-from ..compat import (
-    compat_str,
-    compat_urllib_parse_unquote,
-)
+from ..compat import compat_urllib_parse_unquote
  from ..utils import (
      determine_ext,
      ExtractorError,
      int_or_none,
      str_to_int,
      strip_or_none,
  from ..utils import (
      determine_ext,
      ExtractorError,
      int_or_none,
      str_to_int,
      strip_or_none,
+    url_or_none,
  )
  
  
  class KeezMoviesIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)'
      _TESTS = [{
  )
  
  
  class KeezMoviesIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)'
      _TESTS = [{
-        'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
-        'md5': '1c1e75d22ffa53320f45eeb07bc4cdc0',
+        'url': 'https://www.keezmovies.com/video/arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money-18070681',
+        'md5': '2ac69cdb882055f71d82db4311732a1a',
          'info_dict': {
          'info_dict': {
-            'id': '1214711',
-            'display_id': 'petite-asian-lady-mai-playing-in-bathtub',
+            'id': '18070681',
+            'display_id': 'arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money',
              'ext': 'mp4',
              'ext': 'mp4',
-            'title': 'Petite Asian Lady Mai Playing In Bathtub',
-            'thumbnail': 're:^https?://.*\.jpg$',
+            'title': 'Arab wife want it so bad I see she thirsty and has tiny money.',
+            'thumbnail': None,
              'view_count': int,
              'age_limit': 18,
          }
      }, {
              'view_count': int,
              'age_limit': 18,
          }
      }, {
-        'url': 'http://www.keezmovies.com/video/1214711',
+        'url': 'http://www.keezmovies.com/video/18070681',
          'only_matching': True,
      }]
  
          'only_matching': True,
      }]
  
-    def _extract_info(self, url):
+    def _extract_info(self, url, fatal=True):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
-        display_id = (mobj.group('display_id') if 'display_id'
-            in mobj.groupdict() else None) or mobj.group('id')
+        display_id = (mobj.group('display_id')
+                      if 'display_id' in mobj.groupdict()
+                      else None) or mobj.group('id')
  
          webpage = self._download_webpage(
              url, display_id, headers={'Cookie': 'age_verified=1'})
  
          webpage = self._download_webpage(
              url, display_id, headers={'Cookie': 'age_verified=1'})
@@ -54,7 +53,8 @@ class KeezMoviesIE(InfoExtractor):
          encrypted = False
  
          def extract_format(format_url, height=None):
          encrypted = False
  
          def extract_format(format_url, height=None):
-            if not isinstance(format_url, compat_str) or not format_url.startswith('http'):
+            format_url = url_or_none(format_url)
+            if not format_url or not format_url.startswith(('http', '//')):
                  return
              if format_url in format_urls:
                  return
                  return
              if format_url in format_urls:
                  return
@@ -104,7 +104,11 @@ class KeezMoviesIE(InfoExtractor):
                  raise ExtractorError(
                      'Video %s is no longer available' % video_id, expected=True)
  
                  raise ExtractorError(
                      'Video %s is no longer available' % video_id, expected=True)
  
-        self._sort_formats(formats)
+        try:
+            self._sort_formats(formats)
+        except ExtractorError:
+            if fatal:
+                raise
  
          if not title:
              title = self._html_search_regex(
  
          if not title:
              title = self._html_search_regex(
@@ -121,7 +125,9 @@ class KeezMoviesIE(InfoExtractor):
          }
  
      def _real_extract(self, url):
          }
  
      def _real_extract(self, url):
-        webpage, info = self._extract_info(url)
+        webpage, info = self._extract_info(url, fatal=False)
+        if not info['formats']:
+            return self.url_result(url, 'Generic')
          info['view_count'] = str_to_int(self._search_regex(
              r'<b>([\d,.]+)</b> Views?', webpage, 'view count', fatal=False))
          return info
          info['view_count'] = str_to_int(self._search_regex(
              r'<b>([\d,.]+)</b> Views?', webpage, 'view count', fatal=False))
          return info