_ Git - youtube-dl/blob - youtube_dl/extractor/meipai.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from ..utils import parse_iso8601
   5 from .common import InfoExtractor
   6
   7
   8 class MeipaiIE(InfoExtractor):
   9     IE_DESC = '美拍'
  10     _VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P<id>[0-9]+)'
  11     _TESTS = [
  12         {
  13             'url': 'http://www.meipai.com/media/531697625',
  14             'md5': 'e3e9600f9e55a302daecc90825854b4f',
  15             'info_dict': {
  16                 'id': '531697625',
  17                 'ext': 'mp4',
  18                 'title': '#葉子##阿桑##余姿昀##超級女聲#',
  19                 'description': '#葉子##阿桑##余姿昀##超級女聲#',
  20                 'thumbnail': 're:^https?://.*\.jpg$',
  21                 'creator': '她她-TATA',
  22                 'tags': ['葉子', '阿桑', '余姿昀', '超級女聲'],
  23                 'release_date': 1465492420,
  24             }
  25         },
  26         {
  27             'url': 'http://www.meipai.com/media/576409659',
  28             'md5': '2e807c16ebe67b8b6b3c8dcacbc32f48',
  29             'info_dict': {
  30                 'id': '576409659',
  31                 'ext': 'mp4',
  32                 'title': '#失語者##蔡健雅##吉他彈唱#',
  33                 'description': '#失語者##蔡健雅##吉他彈唱#',
  34                 'thumbnail': 're:^https?://.*\.jpg$',
  35                 'creator': '她她-TATA',
  36                 'tags': ['失語者', '蔡健雅', '吉他彈唱'],
  37                 'release_date': 1472534847,
  38             }
  39         },
  40         # record of live streaming
  41         {
  42             'url': 'http://www.meipai.com/media/585526361',
  43             'md5': 'ff7d6afdbc6143342408223d4f5fb99a',
  44             'info_dict': {
  45                 'id': '585526361',
  46                 'ext': 'mp4',
  47                 'title': '姿昀和善願 練歌練琴啦😁😁😁',
  48                 'description': '姿昀和善願 練歌練琴啦😁😁😁',
  49                 'thumbnail': 're:^https?://.*\.jpg$',
  50                 'creator': '她她-TATA',
  51                 'release_date': 1474311799,
  52             }
  53         },
  54     ]
  55
  56     def _real_extract(self, url):
  57         video_id = self._match_id(url)
  58         webpage = self._download_webpage(url, video_id)
  59
  60         title = self._og_search_title(webpage, default=None)
  61         if title is None:
  62             # fall back to text used in title
  63             title = self._html_search_regex(
  64                 r'<title[^>]*>(.+)</title>', webpage, 'title')
  65
  66         release_date = self._og_search_property(
  67             'video:release_date', webpage, 'release date', fatal=False)
  68         release_date = parse_iso8601(release_date)
  69
  70         tags = self._og_search_property(
  71             'video:tag', webpage, 'tags', default='').split(',')
  72
  73         info = {
  74             'id': video_id,
  75             'title': title,
  76             'thumbnail': self._og_search_thumbnail(webpage),
  77             'description': self._og_search_description(webpage),
  78             'release_date': release_date,
  79             'creator': self._og_search_property(
  80                 'video:director', webpage, 'creator', fatal=False),
  81             'tags': tags,
  82         }
  83
  84         keywords = self._html_search_meta(
  85             'keywords', webpage, 'keywords', default=[])
  86
  87         if '直播回放' in keywords:
  88             # recorded playback of live streaming
  89             m3u8_url = self._html_search_regex(
  90                 r'file:\s*encodeURIComponent\(["\'](.+)["\']\)',
  91                 webpage,
  92                 'm3u8_url')
  93             info['formats'] = self._extract_m3u8_formats(
  94                 m3u8_url, video_id, 'mp4', 'm3u8_native')
  95         else:
  96             # regular uploaded video
  97             info['url'] = self._og_search_video_url(webpage)
  98
  99         return info