2 from __future__ import unicode_literals
6 from .common import InfoExtractor
7 from ..compat import compat_str
16 class MediasetIE(InfoExtractor):
21 (?:www\.)?video\.mediaset\.it/
23 (?:video|on-demand)/(?:[^/]+/)+[^/]+_|
24 player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=
30 'url': 'http://www.video.mediaset.it/video/hello_goodbye/full/quarta-puntata_661824.html',
31 'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',
35 'title': 'Quarta puntata',
36 'description': 'md5:7183696d6df570e3412a5ef74b27c5e2',
37 'thumbnail': r're:^https?://.*\.jpg$',
39 'creator': 'mediaset',
40 'upload_date': '20161107',
41 'series': 'Hello Goodbye',
42 'categories': ['reality'],
44 'expected_warnings': ['is not a supported codec'],
46 'url': 'http://www.video.mediaset.it/video/matrix/full_chiambretti/puntata-del-25-maggio_846685.html',
47 'md5': '1276f966ac423d16ba255ce867de073e',
51 'title': 'Puntata del 25 maggio',
52 'description': 'md5:ee2e456e3eb1dba5e814596655bb5296',
53 'thumbnail': r're:^https?://.*\.jpg$',
55 'creator': 'mediaset',
56 'upload_date': '20180525',
58 'categories': ['infotainment'],
60 'expected_warnings': ['HTTP Error 403: Forbidden'],
63 'url': 'http://www.video.mediaset.it/video/gogglebox/clip/un-grande-classico-della-commedia-sexy_661680.html',
64 'only_matching': True,
67 'url': 'http://www.video.mediaset.it/player/playerIFrame.shtml?id=665924&autoplay=true',
68 'only_matching': True,
70 # iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
71 'url': 'https://www.video.mediaset.it/player/playerIFrameTwitter.shtml?id=665104&playrelated=false&autoplay=false&related=true&hidesocial=true',
72 'only_matching': True,
74 'url': 'mediaset:661824',
75 'only_matching': True,
79 def _extract_urls(webpage):
82 for mobj in re.finditer(
83 r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>https?://(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=\d+.*?)\1',
86 def _real_extract(self, url):
87 video_id = self._match_id(url)
89 video = self._download_json(
90 'https://www.video.mediaset.it/html/metainfo.sjson',
91 video_id, 'Downloading media info', query={
95 title = video['title']
96 media_id = video.get('guid') or video_id
98 video_list = self._download_json(
99 'http://cdnsel01.mediaset.net/GetCdn2018.aspx',
100 video_id, 'Downloading video CDN JSON', query={
101 'streamid': media_id,
106 for format_url in video_list:
107 ext = determine_ext(format_url)
109 formats.extend(self._extract_m3u8_formats(
110 format_url, video_id, 'mp4', entry_protocol='m3u8_native',
111 m3u8_id='hls', fatal=False))
113 formats.extend(self._extract_mpd_formats(
114 format_url, video_id, mpd_id='dash', fatal=False))
115 elif ext == 'ism' or '.ism' in format_url:
116 formats.extend(self._extract_ism_formats(
117 format_url, video_id, ism_id='mss', fatal=False))
121 'format_id': determine_ext(format_url),
123 self._sort_formats(formats)
126 video, lambda x: x['brand-info']['publisher'], compat_str)
128 video, lambda x: x['brand-info']['category'], compat_str)
129 categories = [category] if category else None
134 'description': video.get('short-description'),
135 'thumbnail': video.get('thumbnail'),
136 'duration': parse_duration(video.get('duration')),
138 'upload_date': unified_strdate(video.get('production-date')),
139 'webpage_url': video.get('url'),
140 'series': video.get('brand-value'),
141 'season': video.get('season'),
142 'categories': categories,