Add support for https for all extractors as preventive and future-proof measure
[youtube-dl] / youtube_dl / extractor / pornhd.py
index 954dfccb75954d50a9a46bc14bdb1d0dcbd5588c..39b53ecf68c77786f18956040bf7ccac4fd6dbc5 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class PornHdIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?'
+    _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?'
     _TEST = {
         'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
         'md5': '956b8ca569f7f4d8ec563e2c41598441',
@@ -36,7 +36,8 @@ class PornHdIE(InfoExtractor):
         webpage = self._download_webpage(url, display_id or video_id)
 
         title = self._html_search_regex(
-            r'<title>(.+) porn HD.+?</title>', webpage, 'title')
+            [r'<span[^>]+class=["\']video-name["\'][^>]*>([^<]+)',
+             r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
         description = self._html_search_regex(
             r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
         view_count = int_or_none(self._html_search_regex(
@@ -46,16 +47,17 @@ class PornHdIE(InfoExtractor):
 
         quality = qualities(['sd', 'hd'])
         sources = json.loads(js_to_json(self._search_regex(
-            r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources')))
+            r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]",
+            webpage, 'sources')))
         formats = []
-        for container, s in sources.items():
-            for qname, video_url in s.items():
-                formats.append({
-                    'url': video_url,
-                    'container': container,
-                    'format_id': '%s-%s' % (container, qname),
-                    'quality': quality(qname),
-                })
+        for qname, video_url in sources.items():
+            if not video_url:
+                continue
+            formats.append({
+                'url': video_url,
+                'format_id': qname,
+                'quality': quality(qname),
+            })
         self._sort_formats(formats)
 
         return {