_ Git - youtube-dl/blob - youtube_dl/extractor/twentyfourvideo.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     parse_iso8601,
   7     int_or_none,
   8 )
   9
  10
  11 class TwentyFourVideoIE(InfoExtractor):
  12     IE_NAME = '24video'
  13     _VALID_URL = r'https?://(?:www\.)?24video\.net/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
  14
  15     _TESTS = [
  16         {
  17             'url': 'http://www.24video.net/video/view/1044982',
  18             'md5': 'd041af8b5b4246ea466226a0d6693345',
  19             'info_dict': {
  20                 'id': '1044982',
  21                 'ext': 'mp4',
  22                 'title': 'Эротика каменного века',
  23                 'description': 'Как смотрели порно в каменном веке.',
  24                 'thumbnail': 're:^https?://.*\.jpg$',
  25                 'uploader': 'SUPERTELO',
  26                 'duration': 31,
  27                 'timestamp': 1275937857,
  28                 'upload_date': '20100607',
  29                 'age_limit': 18,
  30                 'like_count': int,
  31                 'dislike_count': int,
  32             },
  33         },
  34         {
  35             'url': 'http://www.24video.net/player/new24_play.swf?id=1044982',
  36             'only_matching': True,
  37         }
  38     ]
  39
  40     def _real_extract(self, url):
  41         video_id = self._match_id(url)
  42
  43         webpage = self._download_webpage(
  44             'http://www.24video.net/video/view/%s' % video_id, video_id)
  45
  46         title = self._og_search_title(webpage)
  47         description = self._html_search_regex(
  48             r'<span itemprop="description">([^<]+)</span>', webpage, 'description', fatal=False)
  49         thumbnail = self._og_search_thumbnail(webpage)
  50         duration = int_or_none(self._og_search_property(
  51             'duration', webpage, 'duration', fatal=False))
  52         timestamp = parse_iso8601(self._search_regex(
  53             r'<time id="video-timeago" datetime="([^"]+)" itemprop="uploadDate">',
  54             webpage, 'upload date'))
  55
  56         uploader = self._html_search_regex(
  57             r'class="video-uploaded"[^>]*>\s*<a href="/jsecUser/movies/[^"]+"[^>]*>([^<]+)</a>',
  58             webpage, 'uploader', fatal=False)
  59
  60         view_count = int_or_none(self._html_search_regex(
  61             r'<span class="video-views">(\d+) просмотр',
  62             webpage, 'view count', fatal=False))
  63         comment_count = int_or_none(self._html_search_regex(
  64             r'<div class="comments-title" id="comments-count">(\d+) комментари',
  65             webpage, 'comment count', fatal=False))
  66
  67         # Sets some cookies
  68         self._download_xml(
  69             r'http://www.24video.net/video/xml/%s?mode=init' % video_id,
  70             video_id, 'Downloading init XML')
  71
  72         video = self._download_xml(
  73             'http://www.24video.net/video/xml/%s?mode=play' % video_id,
  74             video_id, 'Downloading video XML').find('.//video')
  75
  76         formats = [{
  77             'url': video.attrib['url'],
  78         }]
  79
  80         like_count = int_or_none(video.get('ratingPlus'))
  81         dislike_count = int_or_none(video.get('ratingMinus'))
  82         age_limit = 18 if video.get('adult') == 'true' else 0
  83
  84         return {
  85             'id': video_id,
  86             'title': title,
  87             'description': description,
  88             'thumbnail': thumbnail,
  89             'uploader': uploader,
  90             'duration': duration,
  91             'timestamp': timestamp,
  92             'view_count': view_count,
  93             'comment_count': comment_count,
  94             'like_count': like_count,
  95             'dislike_count': dislike_count,
  96             'age_limit': age_limit,
  97             'formats': formats,
  98         }