Merge branch 'pornhub_unquote_password' of github.com:MikeCol/youtube-dl

[youtube-dl] / youtube_dl / extractor / pornhd.py
diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py

index c56740b8a927f7a8955a10b61eec3a9725a222b3..58f9c690e18b4330f7b47e862d0367913e17582a 100644 (file)
--- a/youtube_dl/extractor/pornhd.py
+++ b/youtube_dl/extractor/pornhd.py
@@ -1,17 +1,20 @@
+from __future__ import unicode_literals
+
  import re
  
  from .common import InfoExtractor
  from ..utils import compat_urllib_parse
  
+
  class PornHdIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)'
+    _VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)'
      _TEST = {
-        u'id': u'1962',
-        u'url': u'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
-        u'md5': u'4fe06e5108e8b524c35896f4c54c7155',
-        u'info_dict': {
-            u"title": u"sierra-day-gets-his-cum-all-over-herself-hd-porn-video",
-            u"age_limit": 18,
+        'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
+        'file': '1962.flv',
+        'md5': '35272469887dca97abd30abecc6cdf75',
+        'info_dict': {
+            "title": "sierra-day-gets-his-cum-all-over-herself-hd-porn-video",
+            "age_limit": 18,
          }
      }
  
@@ -20,23 +23,22 @@ class PornHdIE(InfoExtractor):
  
          video_id = mobj.group('video_id')
          video_title = mobj.group('video_title')
-        video_extension = 'flv'
  
          webpage = self._download_webpage(url, video_id)
  
+        next_url = self._html_search_regex(
+            r'&hd=(http.+?)&', webpage, 'video URL')
+        next_url = compat_urllib_parse.unquote(next_url)
  
-        self.report_extraction(video_id)
-
-        video_url = self._html_search_regex(
-            r'&hd=(http.+?)&', webpage, u'video URL')
-        video_url = compat_urllib_parse.unquote(video_url)
-
+        video_url = self._download_webpage(
+            next_url, video_id, note='Retrieving video URL',
+            errnote='Could not retrieve video URL')
          age_limit = 18
  
          return {
-            'id':        video_id,
-            'url':       video_url,
-            'ext':       video_extension,
-            'title':     video_title,
+            'id': video_id,
+            'url': video_url,
+            'ext': 'flv',
+            'title': video_title,
              'age_limit': age_limit,
          }