[extractor/generic] Improve support for pornhub embeds (closes #11100)

author Sergey M․ <dstftw@gmail.com>

Sun, 6 Nov 2016 14:52:00 +0000 (21:52 +0700)

committer Sergey M․ <dstftw@gmail.com>

Sun, 6 Nov 2016 14:52:00 +0000 (21:52 +0700)
author Sergey M․ <dstftw@gmail.com>
Sun, 6 Nov 2016 14:52:00 +0000 (21:52 +0700)
committer Sergey M․ <dstftw@gmail.com>
Sun, 6 Nov 2016 14:52:00 +0000 (21:52 +0700)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index b1315a9c804da98eb925dfd1c66d0564e606a5f2..bde65fa270fb399140e85ac63395060bd7007d2e 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1983,11 +1983,6 @@ class GenericIE(InfoExtractor):
          if sportbox_urls:
              return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
  
-        # Look for embedded PornHub player
-        pornhub_url = PornHubIE._extract_url(webpage)
-        if pornhub_url:
-            return self.url_result(pornhub_url, 'PornHub')
-
          # Look for embedded XHamster player
          xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
          if xhamster_urls:
@@ -1998,6 +1993,11 @@ class GenericIE(InfoExtractor):
          if tnaflix_urls:
              return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
  
+        # Look for embedded PornHub player
+        pornhub_urls = PornHubIE._extract_urls(webpage)
+        if pornhub_urls:
+            return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
+
          # Look for embedded DrTuber player
          drtuber_urls = DrTuberIE._extract_urls(webpage)
          if drtuber_urls:
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py

index 0724efc09963d1f84fc7b24ec99d89b58e1bdfb6..40dbe6967fac2126b7bf6e6a1245768b3c039c8e 100644 (file)
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -33,7 +33,7 @@ class PornHubIE(InfoExtractor):
                              (?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)|
                              (?:www\.)?thumbzilla\.com/video/
                          )
-                        (?P<id>[0-9a-z]+)
+                        (?P<id>[\da-z]+)
                      '''
      _TESTS = [{
          'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
@@ -96,12 +96,11 @@ class PornHubIE(InfoExtractor):
          'only_matching': True,
      }]
  
-    @classmethod
-    def _extract_url(cls, webpage):
-        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/\d+)\1', webpage)
-        if mobj:
-            return mobj.group('url')
+    @staticmethod
+    def _extract_urls(webpage):
+        return re.findall(
+            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)',
+            webpage)
  
      def _extract_count(self, pattern, webpage, name):
          return str_to_int(self._search_regex(
author	Sergey M․ <dstftw@gmail.com>
	Sun, 6 Nov 2016 14:52:00 +0000 (21:52 +0700)
committer	Sergey M․ <dstftw@gmail.com>
	Sun, 6 Nov 2016 14:52:00 +0000 (21:52 +0700)
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/pornhub.py		patch \| blob \| history