from .youtube import YoutubeIE
from ..compat import (
compat_etree_fromstring,
+ compat_str,
compat_urllib_parse_unquote,
compat_urlparse,
compat_xml_parse_error,
from .anvato import AnvatoIE
from .washingtonpost import WashingtonPostIE
from .wistia import WistiaIE
+from .mediaset import MediasetIE
class GenericIE(InfoExtractor):
},
'add_ie': ['Kaltura'],
},
- # Eagle.Platform embed (generic URL)
+ # EaglePlatform embed (generic URL)
{
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
'view_count': int,
'age_limit': 0,
},
+ 'params': {
+ 'skip_download': True,
+ },
},
- # ClipYou (Eagle.Platform) embed (custom URL)
+ # referrer protected EaglePlatform embed
+ {
+ 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
+ 'info_dict': {
+ 'id': '582306',
+ 'ext': 'mp4',
+ 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 3382,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # ClipYou (EaglePlatform) embed (custom URL)
{
'url': 'http://muz-tv.ru/play/7129/',
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
'duration': 216,
'view_count': int,
},
+ 'params': {
+ 'skip_download': True,
+ },
},
# Pladform embed
{
'title': 'Facebook video #599637780109885',
},
},
+ # Facebook <iframe> embed, plugin video
+ {
+ 'url': 'http://5pillarsuk.com/2017/06/07/tariq-ramadan-disagrees-with-pr-exercise-by-imams-refusing-funeral-prayers-for-london-attackers/',
+ 'info_dict': {
+ 'id': '1754168231264132',
+ 'ext': 'mp4',
+ 'title': 'About the Imams and Religious leaders refusing to perform funeral prayers for...',
+ 'uploader': 'Tariq Ramadan (official)',
+ 'timestamp': 1496758379,
+ 'upload_date': '20170606',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
# Facebook API embed
{
'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
},
'add_ie': [WashingtonPostIE.ie_key()],
},
+ {
+ # Mediaset embed
+ 'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml',
+ 'info_dict': {
+ 'id': '720642',
+ 'ext': 'mp4',
+ 'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [MediasetIE.ie_key()],
+ },
+ {
+ # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
+ 'url': 'https://tvrain.ru/amp/418921/',
+ 'md5': 'cc00413936695987e8de148b67d14f1d',
+ 'info_dict': {
+ 'id': '418921',
+ 'ext': 'mp4',
+ 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
+ },
+ },
# {
# # TODO: find another test
# # http://schema.org/VideoObject
content_type = head_response.headers.get('Content-Type', '').lower()
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
if m:
- format_id = m.group('format_id')
+ format_id = compat_str(m.group('format_id'))
if format_id.endswith('mpegurl'):
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
elif format_id == 'f4m':
formats = self._extract_f4m_formats(url, video_id)
else:
formats = [{
- 'format_id': m.group('format_id'),
+ 'format_id': format_id,
'url': url,
'vcodec': 'none' if m.group('type') == 'audio' else None
}]
video_description = self._og_search_description(webpage, default=None)
video_thumbnail = self._og_search_thumbnail(webpage, default=None)
+ info_dict.update({
+ 'title': video_title,
+ 'description': video_description,
+ 'thumbnail': video_thumbnail,
+ 'age_limit': age_limit,
+ })
+
# Look for Brightcove Legacy Studio embeds
bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
if bc_urls:
return self.url_result(mobj.group('url'))
# Look for embedded Facebook player
- facebook_url = FacebookIE._extract_url(webpage)
- if facebook_url is not None:
- return self.url_result(facebook_url, 'Facebook')
+ facebook_urls = FacebookIE._extract_urls(webpage)
+ if facebook_urls:
+ return self.playlist_from_matches(facebook_urls, video_id, video_title)
# Look for embedded VK player
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
if kaltura_url:
return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
- # Look for Eagle.Platform embeds
+ # Look for EaglePlatform embeds
eagleplatform_url = EaglePlatformIE._extract_url(webpage)
if eagleplatform_url:
- return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
+ return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key())
- # Look for ClipYou (uses Eagle.Platform) embeds
+ # Look for ClipYou (uses EaglePlatform) embeds
mobj = re.search(
r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
if mobj is not None:
return self.playlist_from_matches(
wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
+ # Look for Mediaset embeds
+ mediaset_urls = MediasetIE._extract_urls(webpage)
+ if mediaset_urls:
+ return self.playlist_from_matches(
+ mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
+
+ def merge_dicts(dict1, dict2):
+ merged = {}
+ for k, v in dict1.items():
+ if v is not None:
+ merged[k] = v
+ for k, v in dict2.items():
+ if v is None:
+ continue
+ if (k not in merged or
+ (isinstance(v, compat_str) and v and
+ isinstance(merged[k], compat_str) and
+ not merged[k])):
+ merged[k] = v
+ return merged
+
# Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject')
if json_ld.get('url'):
- info_dict.update({
- 'title': video_title or info_dict['title'],
- 'description': video_description,
- 'thumbnail': video_thumbnail,
- 'age_limit': age_limit
- })
- info_dict.update(json_ld)
- return info_dict
+ return merge_dicts(json_ld, info_dict)
# Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if jwplayer_data:
info = self._parse_jwplayer_data(
jwplayer_data, video_id, require_title=False, base_url=url)
- if not info.get('title'):
- info['title'] = video_title
- return info
+ return merge_dicts(info, info_dict)
def check_video(vurl):
if YoutubeIE.suitable(vurl):