[pornhub] Apply scrape detection bypass for all extractors

[youtube-dl] / youtube_dl / extractor / pornhub.py
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py

index e377de19664524844699e4046d3ebe48fcbc0167..be93d5d4832f35815044028ed9c840b719e67213 100644 (file)
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -10,7 +10,9 @@ from .common import InfoExtractor
  from ..compat import (
      compat_HTTPError,
      compat_str,
+    compat_urllib_request,
  )
+from .openload import PhantomJSwrapper
  from ..utils import (
      ExtractorError,
      int_or_none,
@@ -22,7 +24,29 @@ from ..utils import (
  )
  
  
-class PornHubIE(InfoExtractor):
+class PornHubBaseIE(InfoExtractor):
+    def _download_webpage_handle(self, *args, **kwargs):
+        def dl(*args, **kwargs):
+            return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
+
+        webpage, urlh = dl(*args, **kwargs)
+
+        if any(re.search(p, webpage) for p in (
+                r'<body\b[^>]+\bonload=["\']go\(\)',
+                r'document\.cookie\s*=\s*["\']RNKEY=',
+                r'document\.location\.reload\(true\)')):
+            url_or_request = args[0]
+            url = (url_or_request.get_full_url()
+                   if isinstance(url_or_request, compat_urllib_request.Request)
+                   else url_or_request)
+            phantom = PhantomJSwrapper(self, required_version='2.0')
+            phantom.get(url, html=webpage)
+            webpage, urlh = dl(*args, **kwargs)
+
+        return webpage, urlh
+
+
+class PornHubIE(PornHubBaseIE):
      IE_DESC = 'PornHub and Thumbzilla'
      _VALID_URL = r'''(?x)
                      https?://
@@ -307,7 +331,7 @@ class PornHubIE(InfoExtractor):
          }
  
  
-class PornHubPlaylistBaseIE(InfoExtractor):
+class PornHubPlaylistBaseIE(PornHubBaseIE):
      def _extract_entries(self, webpage, host):
          # Only process container div with main playlist content skipping
          # drop-down menu that uses similar pattern for videos (see