[youtube] Fix extraction (closes #20758, closes #20759, closes #20761, closes #20762...
[youtube-dl] / youtube_dl / extractor / twentyfourvideo.py
index af92b713b08e22343f84a282d3db59b355623f04..4b3b3e705eb8f758afb466970ed50b8e2ee102fd 100644 (file)
@@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..utils import (
     parse_iso8601,
@@ -12,7 +14,7 @@ from ..utils import (
 
 class TwentyFourVideoIE(InfoExtractor):
     IE_NAME = '24video'
-    _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
+    _VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sexy?|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://www.24video.net/video/view/1044982',
@@ -22,7 +24,7 @@ class TwentyFourVideoIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Эротика каменного века',
             'description': 'Как смотрели порно в каменном веке.',
-            'thumbnail': 're:^https?://.*\.jpg$',
+            'thumbnail': r're:^https?://.*\.jpg$',
             'uploader': 'SUPERTELO',
             'duration': 31,
             'timestamp': 1275937857,
@@ -37,13 +39,18 @@ class TwentyFourVideoIE(InfoExtractor):
     }, {
         'url': 'http://www.24video.me/video/view/1044982',
         'only_matching': True,
+    }, {
+        'url': 'http://www.24video.tube/video/view/2363750',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        host = mobj.group('host')
 
         webpage = self._download_webpage(
-            'http://www.24video.net/video/view/%s' % video_id, video_id)
+            'http://%s/video/view/%s' % (host, video_id), video_id)
 
         title = self._og_search_title(webpage)
         description = self._html_search_regex(
@@ -53,8 +60,8 @@ class TwentyFourVideoIE(InfoExtractor):
         duration = int_or_none(self._og_search_property(
             'duration', webpage, 'duration', fatal=False))
         timestamp = parse_iso8601(self._search_regex(
-            r'<time id="video-timeago" datetime="([^"]+)" itemprop="uploadDate">',
-            webpage, 'upload date'))
+            r'<time[^>]+\bdatetime="([^"]+)"[^>]+itemprop="uploadDate"',
+            webpage, 'upload date', fatal=False))
 
         uploader = self._html_search_regex(
             r'class="video-uploaded"[^>]*>\s*<a href="/jsecUser/movies/[^"]+"[^>]*>([^<]+)</a>',
@@ -65,15 +72,15 @@ class TwentyFourVideoIE(InfoExtractor):
             webpage, 'view count', fatal=False))
         comment_count = int_or_none(self._html_search_regex(
             r'<a[^>]+href="#tab-comments"[^>]*>(\d+) комментари',
-            webpage, 'comment count', fatal=False))
+            webpage, 'comment count', default=None))
 
         # Sets some cookies
         self._download_xml(
-            r'http://www.24video.net/video/xml/%s?mode=init' % video_id,
+            r'http://%s/video/xml/%s?mode=init' % (host, video_id),
             video_id, 'Downloading init XML')
 
         video_xml = self._download_xml(
-            'http://www.24video.net/video/xml/%s?mode=play' % video_id,
+            'http://%s/video/xml/%s?mode=play' % (host, video_id),
             video_id, 'Downloading video XML')
 
         video = xpath_element(video_xml, './/video', 'video', fatal=True)