[vporn] Make video URL regex more strict
authorSergey M․ <dstftw@gmail.com>
Mon, 15 Sep 2014 12:19:37 +0000 (19:19 +0700)
committerSergey M․ <dstftw@gmail.com>
Mon, 15 Sep 2014 12:19:37 +0000 (19:19 +0700)
There is a garbage instead of proper URL for some HD videos

youtube_dl/extractor/vporn.py

index 426369c51bb524b41c63b8377b6ce3f489d0d4b7..2d23effccdff0ba49ff628ded1f72d044fe609d6 100644 (file)
@@ -11,22 +11,48 @@ from ..utils import (
 
 class VpornIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?vporn\.com/[^/]+/(?P<display_id>[^/]+)/(?P<id>\d+)'
-    _TEST = {
-        'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
-        'md5': 'facf37c1b86546fa0208058546842c55',
-        'info_dict': {
-            'id': '497944',
-            'display_id': 'violet-on-her-th-birthday',
-            'ext': 'mp4',
-            'title': 'Violet on her 19th birthday',
-            'description': 'Violet dances in front of the camera which is sure to get you horny.',
-            'thumbnail': 're:^https?://.*\.jpg$',
-            'uploader': 'kileyGrope',
-            'categories': ['Masturbation', 'Teen'],
-            'duration': 393,
-            'age_limit': 18,
-        }
-    }
+    _TESTS = [
+        {
+            'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
+            'md5': 'facf37c1b86546fa0208058546842c55',
+            'info_dict': {
+                'id': '497944',
+                'display_id': 'violet-on-her-th-birthday',
+                'ext': 'mp4',
+                'title': 'Violet on her 19th birthday',
+                'description': 'Violet dances in front of the camera which is sure to get you horny.',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'uploader': 'kileyGrope',
+                'categories': ['Masturbation', 'Teen'],
+                'duration': 393,
+                'age_limit': 18,
+                'view_count': int,
+                'like_count': int,
+                'dislike_count': int,
+                'comment_count': int,
+            }
+        },
+        {
+            'url': 'http://www.vporn.com/female/hana-shower/523564/',
+            'md5': 'ced35a4656198a1664cf2cda1575a25f',
+            'info_dict': {
+                'id': '523564',
+                'display_id': 'hana-shower',
+                'ext': 'mp4',
+                'title': 'Hana Shower',
+                'description': 'Hana showers at the bathroom.',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'uploader': 'Hmmmmm',
+                'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female'],
+                'duration': 588,
+                'age_limit': 18,
+                'view_count': int,
+                'like_count': int,
+                'dislike_count': int,
+                'comment_count': int,
+            }
+        },
+    ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -64,7 +90,7 @@ class VpornIE(InfoExtractor):
 
         formats = []
 
-        for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"([^"]+)"', webpage):
+        for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"(https?://[^"]+)"', webpage):
             video_url = video[1]
             fmt = {
                 'url': video_url,