[keek] extract uploader and uploader id with _search_regex
[youtube-dl] / youtube_dl / extractor / keek.py
index 72ad6a3d00b25f30f8d56e06bf5a15da32b8a911..99154112bb4fead1bd1e12fb689d7f9b911f08ba 100644 (file)
@@ -1,41 +1,37 @@
-import re
+# coding: utf-8
+from __future__ import unicode_literals
 
 from .common import InfoExtractor
 
 
 class KeekIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
-    IE_NAME = u'keek'
+    _VALID_URL = r'https?://(?:www\.)?keek\.com/keek/(?P<id>\w+)'
+    IE_NAME = 'keek'
     _TEST = {
-        u'url': u'http://www.keek.com/ytdl/keeks/NODfbab',
-        u'file': u'NODfbab.mp4',
-        u'md5': u'9b0636f8c0f7614afa4ea5e4c6e57e83',
-        u'info_dict': {
-            u"uploader": u"ytdl", 
-            u"title": u"test chars: \"'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ."
-        }
+        'url': 'https://www.keek.com/keek/NODfbab',
+        'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83',
+        'info_dict': {
+            'id': 'NODfbab',
+            'ext': 'mp4',
+            'title': 'test chars: "\'/\\รค<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de . - Video - Videos on Keek',
+            'description': 'md5:35d42050a3ece241d5ddd7fdcc6fd896',
+            'uploader': 'ytdl',
+            'uploader_id': 'eGT5bab',
+        },
     }
 
     def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        video_id = m.group('videoID')
+        video_id = self._match_id(url)
 
-        video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
-        thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
         webpage = self._download_webpage(url, video_id)
 
-        video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
-            webpage, u'title')
-
-        uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
-            webpage, u'uploader', fatal=False)
-
-        info = {
-                'id': video_id,
-                'url': video_url,
-                'ext': 'mp4',
-                'title': video_title,
-                'thumbnail': thumbnail,
-                'uploader': uploader
+        return {
+            'id': video_id,
+            'url': self._og_search_video_url(webpage),
+            'ext': 'mp4',
+            'title': self._og_search_title(webpage),
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'uploader': self._search_regex(r'data-username="([^"]+)"', webpage, 'uploader', None),
+            'uploader_id': self._search_regex(r'data-user-id="([^"]+)"', webpage, 'uploader id', None),
         }
-        return [info]