_ Git - youtube-dl/blob - youtube_dl/extractor/picarto.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import time
   5
   6 from .common import InfoExtractor
   7 from ..compat import compat_str
   8 from ..utils import (
   9     ExtractorError,
  10     js_to_json,
  11     update_url_query,
  12     urlencode_postdata,
  13 )
  14
  15
  16 class PicartoIE(InfoExtractor):
  17     _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?'
  18     _TEST = {
  19         'url': 'https://picarto.tv/Setz',
  20         'info_dict': {
  21             'id': 'Setz',
  22             'ext': 'mp4',
  23             'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
  24             'timestamp': int,
  25             'is_live': True
  26         },
  27         'skip': 'Stream is offline',
  28     }
  29
  30     @classmethod
  31     def suitable(cls, url):
  32         return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
  33
  34     def _real_extract(self, url):
  35         channel_id = self._match_id(url)
  36         metadata = self._download_json(
  37             'https://api.picarto.tv/v1/channel/name/' + channel_id,
  38             channel_id)
  39
  40         if metadata.get('online') is False:
  41             raise ExtractorError('Stream is offline', expected=True)
  42
  43         cdn_data = self._download_json(
  44             'https://picarto.tv/process/channel', channel_id,
  45             data=urlencode_postdata({'loadbalancinginfo': channel_id}),
  46             note='Downloading load balancing info')
  47
  48         token = self._VALID_URL_RE.match(url).group('token') or 'public'
  49         params = {
  50             'con': int(time.time() * 1000),
  51             'token': token,
  52         }
  53
  54         prefered_edge = cdn_data.get('preferedEdge')
  55         formats = []
  56
  57         for edge in cdn_data['edges']:
  58             edge_ep = edge.get('ep')
  59             if not edge_ep or not isinstance(edge_ep, compat_str):
  60                 continue
  61             edge_id = edge.get('id')
  62             for tech in cdn_data['techs']:
  63                 tech_label = tech.get('label')
  64                 tech_type = tech.get('type')
  65                 preference = 0
  66                 if edge_id == prefered_edge:
  67                     preference += 1
  68                 format_id = []
  69                 if edge_id:
  70                     format_id.append(edge_id)
  71                 if tech_type == 'application/x-mpegurl' or tech_label == 'HLS':
  72                     format_id.append('hls')
  73                     formats.extend(self._extract_m3u8_formats(
  74                         update_url_query(
  75                             'https://%s/hls/%s/index.m3u8'
  76                             % (edge_ep, channel_id), params),
  77                         channel_id, 'mp4', preference=preference,
  78                         m3u8_id='-'.join(format_id), fatal=False))
  79                     continue
  80                 elif tech_type == 'video/mp4' or tech_label == 'MP4':
  81                     format_id.append('mp4')
  82                     formats.append({
  83                         'url': update_url_query(
  84                             'https://%s/mp4/%s.mp4' % (edge_ep, channel_id),
  85                             params),
  86                         'format_id': '-'.join(format_id),
  87                         'preference': preference,
  88                     })
  89                 else:
  90                     # rtmp format does not seem to work
  91                     continue
  92         self._sort_formats(formats)
  93
  94         mature = metadata.get('adult')
  95         if mature is None:
  96             age_limit = None
  97         else:
  98             age_limit = 18 if mature is True else 0
  99
 100         return {
 101             'id': channel_id,
 102             'title': self._live_title(channel_id),
 103             'is_live': True,
 104             'thumbnail': metadata.get('thumbnails', {}).get('web'),
 105             'age_limit': age_limit,
 106             'formats': formats,
 107         }
 108
 109
 110 class PicartoVodIE(InfoExtractor):
 111     _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)'
 112     _TESTS = [{
 113         'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
 114         'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
 115         'info_dict': {
 116             'id': 'ArtofZod_2017.12.12.00.13.23.flv',
 117             'ext': 'mp4',
 118             'title': 'ArtofZod_2017.12.12.00.13.23.flv',
 119             'thumbnail': r're:^https?://.*\.jpg'
 120         },
 121     }, {
 122         'url': 'https://picarto.tv/videopopout/Plague',
 123         'only_matching': True,
 124     }]
 125
 126     def _real_extract(self, url):
 127         video_id = self._match_id(url)
 128
 129         webpage = self._download_webpage(url, video_id)
 130
 131         vod_info = self._parse_json(
 132             self._search_regex(
 133                 r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage,
 134                 video_id),
 135             video_id, transform_source=js_to_json)
 136
 137         formats = self._extract_m3u8_formats(
 138             vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
 139             m3u8_id='hls')
 140         self._sort_formats(formats)
 141
 142         return {
 143             'id': video_id,
 144             'title': video_id,
 145             'thumbnail': vod_info.get('vodThumb'),
 146             'formats': formats,
 147         }