[tube8] Improve _VALID_URL and add display_id
authorSergey M <dstftw@gmail.com>
Sat, 20 Sep 2014 23:37:11 +0000 (06:37 +0700)
committerSergey M <dstftw@gmail.com>
Sat, 20 Sep 2014 23:37:11 +0000 (06:37 +0700)
youtube_dl/extractor/tube8.py

index 39f20c54688e9371f5a299e04b84919de2db4f4e..64a1e903022a78fa3a2b15eeff5eed20afce568d 100644 (file)
@@ -14,27 +14,35 @@ from ..aes import aes_decrypt_text
 
 
 class Tube8IE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:gay/|shemale/)?(?:[^/]+/){2}(?P<id>\d+)'
-    _TEST = {
-        'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
-        'md5': '44bf12b98313827dd52d35b8706a4ea0',
-        'info_dict': {
-            'id': '229795',
-            'ext': 'mp4',
-            'description': 'hot teen Kasia grinding',
-            'uploader': 'unknown',
-            'title': 'Kasia music video',
-            'age_limit': 18,
-        }
-    }
+    _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)'
+    _TESTS = [
+        {
+            'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
+            'md5': '44bf12b98313827dd52d35b8706a4ea0',
+            'info_dict': {
+                'id': '229795',
+                'display_id': 'kasia-music-video',
+                'ext': 'mp4',
+                'description': 'hot teen Kasia grinding',
+                'uploader': 'unknown',
+                'title': 'Kasia music video',
+                'age_limit': 18,
+            }
+        },
+        {
+            'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/',
+            'only_matching': True,
+        },
+    ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
 
         req = compat_urllib_request.Request(url)
         req.add_header('Cookie', 'age_verified=1')
-        webpage = self._download_webpage(req, video_id)
+        webpage = self._download_webpage(req, display_id)
 
         flashvars = json.loads(self._html_search_regex(
             r'var flashvars\s*=\s*({.+?})', webpage, 'flashvars'))
@@ -70,6 +78,7 @@ class Tube8IE(InfoExtractor):
 
         return {
             'id': video_id,
+            'display_id': display_id,
             'url': video_url,
             'title': title,
             'description': description,