[aftonbladet] add extractor for aftonbladet.se
[youtube-dl] / youtube_dl / extractor / pornhd.py
index c56740b8a927f7a8955a10b61eec3a9725a222b3..58f9c690e18b4330f7b47e862d0367913e17582a 100644 (file)
@@ -1,17 +1,20 @@
+from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
 from ..utils import compat_urllib_parse
 
+
 class PornHdIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)'
+    _VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)'
     _TEST = {
-        u'id': u'1962',
-        u'url': u'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
-        u'md5': u'4fe06e5108e8b524c35896f4c54c7155',
-        u'info_dict': {
-            u"title": u"sierra-day-gets-his-cum-all-over-herself-hd-porn-video",
-            u"age_limit": 18,
+        'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
+        'file': '1962.flv',
+        'md5': '35272469887dca97abd30abecc6cdf75',
+        'info_dict': {
+            "title": "sierra-day-gets-his-cum-all-over-herself-hd-porn-video",
+            "age_limit": 18,
         }
     }
 
@@ -20,23 +23,22 @@ class PornHdIE(InfoExtractor):
 
         video_id = mobj.group('video_id')
         video_title = mobj.group('video_title')
-        video_extension = 'flv'
 
         webpage = self._download_webpage(url, video_id)
 
+        next_url = self._html_search_regex(
+            r'&hd=(http.+?)&', webpage, 'video URL')
+        next_url = compat_urllib_parse.unquote(next_url)
 
-        self.report_extraction(video_id)
-
-        video_url = self._html_search_regex(
-            r'&hd=(http.+?)&', webpage, u'video URL')
-        video_url = compat_urllib_parse.unquote(video_url)
-
+        video_url = self._download_webpage(
+            next_url, video_id, note='Retrieving video URL',
+            errnote='Could not retrieve video URL')
         age_limit = 18
 
         return {
-            'id':        video_id,
-            'url':       video_url,
-            'ext':       video_extension,
-            'title':     video_title,
+            'id': video_id,
+            'url': video_url,
+            'ext': 'flv',
+            'title': video_title,
             'age_limit': age_limit,
         }