from .xhamster import XHamsterEmbedIE
from .tnaflix import TNAFlixNetworkEmbedIE
from .vimeo import VimeoIE
-from .dailymotion import DailymotionCloudIE
+from .dailymotion import (
+ DailymotionIE,
+ DailymotionCloudIE,
+)
from .onionstudios import OnionStudiosIE
from .viewlift import ViewLiftEmbedIE
from .screenwavemedia import ScreenwaveMediaIE
from .googledrive import GoogleDriveIE
from .jwplatform import JWPlatformIE
from .digiteka import DigitekaIE
+from .arkena import ArkenaIE
from .instagram import InstagramIE
from .liveleak import LiveLeakIE
from .threeqsdn import ThreeQSDNIE
from .kaltura import KalturaIE
from .eagleplatform import EaglePlatformIE
from .facebook import FacebookIE
+from .soundcloud import SoundcloudIE
+from .vbox7 import Vbox7IE
class GenericIE(InfoExtractor):
'url': 'http://www.vestifinance.ru/articles/25753',
'info_dict': {
'id': '25753',
- 'title': 'Ð\92еÑ\81Ñ\82и Ðкономика â\80\95 Ð\9fÑ\80Ñ\8fмÑ\8bе Ñ\82Ñ\80анÑ\81лÑ\8fÑ\86ии Ñ\81 ФоÑ\80Ñ\83ма-вÑ\8bÑ\81Ñ\82авки "Ð\93оÑ\81заказ-2013"',
+ 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
},
'playlist': [{
'info_dict': {
'ext': 'mp4',
'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
'description': 'Two valets share their love for movie star Liam Neesons.',
+ 'timestamp': 1349922600,
+ 'upload_date': '20121011',
},
},
# YouTube embed via <data-embed-url="">
'upload_date': '20141029',
}
},
+ # Soundcloud multiple embeds
+ {
+ 'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
+ 'info_dict': {
+ 'id': '52809',
+ 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance | TAB + AUDIO',
+ },
+ 'playlist_mincount': 7,
+ },
# Livestream embed
{
'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
'description': 'md5:601cb790edd05908957dae8aaa866465',
'upload_date': '20150220',
},
+ 'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
},
# jwplayer YouTube
{
'uploader': 'www.hudl.com',
},
},
+ # twitter:player:stream embed
+ {
+ 'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
+ 'info_dict': {
+ 'id': 'master',
+ 'ext': 'mp4',
+ 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
+ 'uploader': 'www.rtl.be',
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ },
# twitter:player embed
{
'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
'uploader': 'cylus cyrus',
},
},
+ {
+ # video stored on custom kaltura server
+ 'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
+ 'md5': '537617d06e64dfed891fa1593c4b30cc',
+ 'info_dict': {
+ 'id': '0_1iotm5bh',
+ 'ext': 'mp4',
+ 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
+ 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
+ 'uploader_id': 'videos.expansion@el-mundo.net',
+ 'upload_date': '20150429',
+ 'timestamp': 1430303472,
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # Non-standard Vimeo embed
+ 'url': 'https://openclassrooms.com/courses/understanding-the-web',
+ 'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
+ 'info_dict': {
+ 'id': '148867247',
+ 'ext': 'mp4',
+ 'title': 'Understanding the web - Teaser',
+ 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
+ 'upload_date': '20151214',
+ 'uploader': 'OpenClassrooms',
+ 'uploader_id': 'openclassrooms',
+ },
+ 'add_ie': ['Vimeo'],
+ },
+ {
+ 'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
+ 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
+ 'info_dict': {
+ 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
+ 'ext': 'mp4',
+ 'title': 'Big Buck Bunny',
+ 'description': 'Royalty free test video',
+ 'timestamp': 1432816365,
+ 'upload_date': '20150528',
+ 'is_live': False,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [ArkenaIE.ie_key()],
+ },
+ {
+ 'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
+ 'info_dict': {
+ 'id': '1c7141f46c',
+ 'ext': 'mp4',
+ 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [Vbox7IE.ie_key()],
+ },
+ # {
+ # # TODO: find another test
+ # # http://schema.org/VideoObject
+ # 'url': 'https://flipagram.com/f/nyvTSJMKId',
+ # 'md5': '888dcf08b7ea671381f00fab74692755',
+ # 'info_dict': {
+ # 'id': 'nyvTSJMKId',
+ # 'ext': 'mp4',
+ # 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
+ # 'description': '#love for cats.',
+ # 'timestamp': 1461244995,
+ # 'upload_date': '20160421',
+ # },
+ # 'params': {
+ # 'force_generic_extractor': True,
+ # },
+ # }
]
def report_following_redirect(self, new_url):
if matches:
return _playlist_from_matches(matches, lambda m: m[-1])
- # Look for embedded Dailymotion player
- matches = re.findall(
- r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
+ matches = DailymotionIE._extract_urls(webpage)
if matches:
- return _playlist_from_matches(
- matches, lambda m: unescapeHTML(m[1]))
+ return _playlist_from_matches(matches)
# Look for embedded Dailymotion playlist player (#3822)
m = re.search(
return self.url_result(myvi_url)
# Look for embedded soundcloud player
- mobj = re.search(
- r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
- webpage)
- if mobj is not None:
- url = unescapeHTML(mobj.group('url'))
- return self.url_result(url)
+ soundcloud_urls = SoundcloudIE._extract_urls(webpage)
+ if soundcloud_urls:
+ return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
# Look for embedded mtvservices player
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
if digiteka_url:
return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
+ # Look for Arkena embeds
+ arkena_url = ArkenaIE._extract_url(webpage)
+ if arkena_url:
+ return self.url_result(arkena_url, ArkenaIE.ie_key())
+
# Look for Limelight embeds
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
if mobj:
return self.url_result(
self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
+ # Look for VODPlatform embeds
+ mobj = re.search(
+ r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vod-platform\.net/embed/[^/?#]+)',
+ webpage)
+ if mobj is not None:
+ return self.url_result(
+ self._proto_relative_url(unescapeHTML(mobj.group(1))), 'VODPlatform')
+
# Look for Instagram embeds
instagram_embed_url = InstagramIE._extract_embed_url(webpage)
if instagram_embed_url is not None:
'uploader': video_uploader,
}
- # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser
- embed_url = self._html_search_meta('twitter:player', webpage, default=None)
- if embed_url:
- return self.url_result(embed_url)
+ # Look for VBOX7 embeds
+ vbox7_url = Vbox7IE._extract_url(webpage)
+ if vbox7_url:
+ return self.url_result(vbox7_url, Vbox7IE.ie_key())
+
+ # Looking for http://schema.org/VideoObject
+ json_ld = self._search_json_ld(
+ webpage, video_id, default={}, expected_type='VideoObject')
+ if json_ld.get('url'):
+ info_dict.update({
+ 'title': video_title or info_dict['title'],
+ 'description': video_description,
+ 'thumbnail': video_thumbnail,
+ 'age_limit': age_limit
+ })
+ info_dict.update(json_ld)
+ return info_dict
def check_video(vurl):
if YoutubeIE.suitable(vurl):
r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
if not found:
# Try to find twitter cards info
+ # twitter:player:stream should be checked before twitter:player since
+ # it is expected to contain a raw stream (see
+ # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
found = filter_video(re.findall(
r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
if not found:
'_type': 'url',
'url': new_url,
}
+
+ if not found:
+ # twitter:player is a https URL to iframe player that may or may not
+ # be supported by youtube-dl thus this is checked the very last (see
+ # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
+ embed_url = self._html_search_meta('twitter:player', webpage, default=None)
+ if embed_url:
+ return self.url_result(embed_url)
+
if not found:
raise UnsupportedError(url)