_ Git - youtube-dl/blob - youtube_dl/extractor/arkenaplay.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     int_or_none,
   6     parse_iso8601
   7 )
   8 import re
   9
  10
  11 class ArkenaPlayIE(InfoExtractor):
  12     IE_NAME = 'ArkenaPlay'
  13     _VALID_URL = r'(?P<shortcut>arkena:(?P<version>[0-9]+):(?P<mediatype>[A-Za-z0-9]+):(?P<mediaId>[^:]+):(?P<widgetsettingId>[A-Za-z0-9]+):(?P<accountId>[A-Za-z0-9]+))|(?:(?P<host>https?://(?:www\.)?play\..*\..*)/embed/(?:avp/v[0-9]+/player/[A-Za-z0-9]+/)?(?P<id>.*)?)'
  14
  15     _TESTS = [{
  16         'url': 'http://play.lcp.fr/embed/327336/131064/darkmatter/0',
  17         'md5': '6cea4f7d13810464ef8485a924fc3333',
  18         'info_dict': {
  19             'id': '327336',
  20             'url': 're:http://httpod.scdn.arkena.com/11970/327336.*',
  21             'ext': 'mp4',
  22             'title': '327336',
  23             'upload_date': '20160225',
  24             'timestamp': 1456391602
  25         }
  26     }, {
  27         # Shortcut for: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
  28         'url': 'arkena:2:media:b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe:1:129411',
  29         'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
  30         'info_dict': {
  31             'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
  32             'url': 'http://88e04ec095b07cd1aa3ea588be47e870.httpcache0.90034-httpcache0.dna.qbrick.com/90034-httpcache0/4bf759a1-00090034/bbb_sunflower_2160p_60fps_normal_720p.mp4',
  33             'ext': 'mp4',
  34             'title': 'Big Buck Bunny',
  35             'description': 'Royalty free test video',
  36             'upload_date': '20150528',
  37             'timestamp': 1432816365
  38         }
  39     }]
  40
  41     def _real_extract(self, url):
  42         mobj = re.match(self._VALID_URL, url)
  43         if mobj.group('shortcut'):
  44             version = mobj.group('version')
  45             mediatype = mobj.group('mediatype')
  46             mediaid = mobj.group('mediaId')
  47             widgetsettingid = mobj.group('widgetsettingId')
  48             accountid = mobj.group('accountId')
  49             display_id = '{0}:{1}:{2}:{3}'.format(mediatype, mediaid, widgetsettingid, accountid)
  50             media_url = 'https://play.arkena.com/config/avp/v{0}/player/{1}/{2}/{3}/{4}/?callbackMethod=?'.format(
  51                 version, mediatype, mediaid, widgetsettingid, accountid)
  52         else:
  53             display_id = self._search_regex(self._VALID_URL, url, 'host_name', group='id')
  54             webpage = self._download_webpage(url, display_id)
  55
  56             media_url_regex = '"(?P<mediainfo>(?P<host>.*)/(c|C)onfig/.*\?callbackMethod=\?)"'
  57             media_url = self._html_search_regex(media_url_regex, webpage, 'arkena_media_info_url')
  58             hostname = self._html_search_regex(media_url_regex, webpage, 'arkena_media_host', group='host')
  59             if not hostname:
  60                 hostname = self._search_regex(self._VALID_URL, url, 'host_name', group='host')
  61                 media_url = hostname + media_url
  62
  63         # Extract the required info of the media files gathered in a dictionary
  64         arkena_info = self._download_webpage(media_url, 'arkena_info_')
  65         arkena_info_regex = r'\?\((?P<json>.*)\);'
  66         media_dict = self._parse_json(self._search_regex(arkena_info_regex, arkena_info, 'json', group='json'),
  67                                       display_id)
  68
  69         # All videos are part of a playlist, a single video is also put in a playlist
  70         playlist_items = media_dict.get('Playlist', [])
  71         if len(playlist_items) == 0:
  72             return self.url_result(url, 'Generic')
  73         elif len(playlist_items) == 1:
  74             arkena_media_info = playlist_items[0]
  75             return self.__extract_from_playlistentry(arkena_media_info)
  76         else:
  77             entries_info = []
  78             for arkena_playlist_item in playlist_items:
  79                 entries_info.append(self.__extract_from_playlistentry(arkena_playlist_item))
  80             return {
  81                 'id': display_id,
  82                 'entries': entries_info
  83             }
  84
  85     def __extract_from_playlistentry(self, arkena_playlistentry_info):
  86         media_info = arkena_playlistentry_info.get('MediaInfo', {})
  87         thumbnails = self.__get_thumbnails(media_info)
  88         title = media_info.get('Title')
  89         description = media_info.get('Description')
  90         video_id = media_info.get('VideoId')
  91         timestamp = parse_iso8601(media_info.get('PublishDate'))
  92         formats = self.__get_video_formats(arkena_playlistentry_info, video_id)
  93         return {
  94             'id': video_id,
  95             'title': title,
  96             'formats': formats,
  97             'thumbnails': thumbnails,
  98             'description': description,
  99             'timestamp': timestamp
 100         }
 101
 102     def __get_thumbnails(self, arkena_mediainfo):
 103         thumbnails = []
 104         thumbnails_info = arkena_mediainfo.get('Poster')
 105         if not thumbnails_info:
 106             return None
 107         for thumbnail in thumbnails_info:
 108             thumbnail_url = thumbnail.get('Url')
 109             if not thumbnail_url:
 110                 continue
 111             thumbnails.append({
 112                 'url': thumbnail_url,
 113                 'width': int_or_none(thumbnail.get('Size'))
 114             })
 115         return thumbnails
 116
 117     def __get_video_formats(self, media_files_info, video_id):
 118         formats = []
 119         media_files = media_files_info.get('MediaFiles')
 120         if not media_files:
 121             return None
 122
 123         for type_name, video_files_json in media_files.iteritems():
 124             for video_info in video_files_json:
 125                 video_url = video_info.get('Url')
 126                 if not video_url:
 127                     continue
 128                 type = video_info.get('Type')
 129                 if type_name in ['Mp4', 'WebM', 'Flash']:
 130                     bitrate = int_or_none(video_info.get('Bitrate'), scale=1000)
 131                     ext = None
 132                     if type == 'video/mp4':
 133                         ext = 'mp4'
 134                     elif type == 'video/webm':
 135                         ext = 'webm'
 136                     elif type == 'video/x-flv':
 137                         ext = 'flv'
 138                     formats.append({
 139                         'url': video_url,
 140                         'ext': ext,
 141                         'tbr': bitrate
 142                     })
 143                 elif type_name == 'M3u8' and type == 'application/x-mpegURL':
 144                     formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
 145                 elif type_name == 'Flash' and type == 'application/hds+xml':
 146                     formats.extend(self._extract_f4m_formats(video_url, video_id, f4m_id='hds', fatal=False))
 147                 elif type_name == 'Dash' and type == 'application/dash+xml':
 148                     formats.extend(self._extract_mpd_formats(video_url, video_id, mpd_id='dash', fatal=False))
 149
 150         self._sort_formats(formats)
 151         return formats