2 from __future__ import unicode_literals
6 from .common import InfoExtractor
7 from ..compat import compat_str
17 class MediasetIE(InfoExtractor):
22 (?:www\.)?video\.mediaset\.it/
24 (?:video|on-demand)/(?:[^/]+/)+[^/]+_|
25 player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=
31 'url': 'http://www.video.mediaset.it/video/hello_goodbye/full/quarta-puntata_661824.html',
32 'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',
36 'title': 'Quarta puntata',
37 'description': 'md5:7183696d6df570e3412a5ef74b27c5e2',
38 'thumbnail': r're:^https?://.*\.jpg$',
40 'creator': 'mediaset',
41 'upload_date': '20161107',
42 'series': 'Hello Goodbye',
43 'categories': ['reality'],
45 'expected_warnings': ['is not a supported codec'],
47 'url': 'http://www.video.mediaset.it/video/matrix/full_chiambretti/puntata-del-25-maggio_846685.html',
48 'md5': '1276f966ac423d16ba255ce867de073e',
52 'title': 'Puntata del 25 maggio',
53 'description': 'md5:ee2e456e3eb1dba5e814596655bb5296',
54 'thumbnail': r're:^https?://.*\.jpg$',
56 'creator': 'mediaset',
57 'upload_date': '20180525',
59 'categories': ['infotainment'],
61 'expected_warnings': ['is not a supported codec'],
64 'url': 'http://www.video.mediaset.it/video/gogglebox/clip/un-grande-classico-della-commedia-sexy_661680.html',
65 'only_matching': True,
68 'url': 'http://www.video.mediaset.it/player/playerIFrame.shtml?id=665924&autoplay=true',
69 'only_matching': True,
71 # iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
72 'url': 'https://www.video.mediaset.it/player/playerIFrameTwitter.shtml?id=665104&playrelated=false&autoplay=false&related=true&hidesocial=true',
73 'only_matching': True,
75 'url': 'mediaset:661824',
76 'only_matching': True,
80 def _extract_urls(webpage):
83 for mobj in re.finditer(
84 r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>https?://(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=\d+.*?)\1',
87 def _real_extract(self, url):
88 video_id = self._match_id(url)
90 media_info = self._download_json(
91 'https://www.video.mediaset.it/html/metainfo.sjson',
92 video_id, 'Downloading media info', query={
96 media_id = try_get(media_info, lambda x: x['guid']) or video_id
98 video_list = self._download_json(
99 'http://cdnsel01.mediaset.net/GetCdn2018.aspx',
100 video_id, 'Downloading video CDN JSON', query={
101 'streamid': media_id,
106 for format_url in video_list:
107 if '.ism' in format_url:
109 formats.extend(self._extract_ism_formats(
110 format_url, video_id, ism_id='mss', fatal=False))
111 except ExtractorError:
116 'format_id': determine_ext(format_url),
118 self._sort_formats(formats)
120 title = media_info['title']
123 media_info, lambda x: x['brand-info']['publisher'], compat_str)
125 media_info, lambda x: x['brand-info']['category'], compat_str)
126 categories = [category] if category else None
131 'description': media_info.get('short-description'),
132 'thumbnail': media_info.get('thumbnail'),
133 'duration': parse_duration(media_info.get('duration')),
135 'upload_date': unified_strdate(media_info.get('production-date')),
136 'webpage_url': media_info.get('url'),
137 'series': media_info.get('brand-value'),
138 'categories': categories,