from .ooyala import OoyalaIE
from .rutv import RUTVIE
from .smotri import SmotriIE
+from .condenast import CondeNastIE
class GenericIE(InfoExtractor):
# funnyordie embed
{
'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
- 'md5': '7cf780be104d40fea7bae52eed4a470e',
'info_dict': {
'id': '18e820ec3f',
'ext': 'mp4',
# Embedded TED video
{
'url': 'http://en.support.wordpress.com/videos/ted-talks/',
- 'md5': 'deeeabcc1085eb2ba205474e7235a3d5',
+ 'md5': '65fdff94098e4a607385a60c5177c638',
'info_dict': {
- 'id': '981',
+ 'id': '1969',
'ext': 'mp4',
- 'title': 'My web playroom',
- 'uploader': 'Ze Frank',
- 'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
+ 'title': 'Hidden miracles of the natural world',
+ 'uploader': 'Louie Schwartzberg',
+ 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
}
},
# Embeded Ustream video
'skip_download': 'Requires rtmpdump'
}
},
- # smotri embed
- {
- 'url': 'http://rbctv.rbc.ru/archive/news/562949990879132.shtml',
- 'md5': 'ec40048448e9284c9a1de77bb188108b',
- 'info_dict': {
- 'id': 'v27008541fad',
- 'ext': 'mp4',
- 'title': 'Крым и Севастополь вошли в состав России',
- 'description': 'md5:fae01b61f68984c7bd2fa741e11c3175',
- 'duration': 900,
- 'upload_date': '20140318',
- 'uploader': 'rbctv_2012_4',
- 'uploader_id': 'rbctv_2012_4',
- },
- },
# Condé Nast embed
{
'url': 'http://www.wired.com/2014/04/honda-asimo/',
{
'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
'info_dict': {
- 'id': 'jpSGZsgga_I',
+ 'id': '4vAffPZIT44',
'ext': 'mp4',
- 'title': 'Asphalt 8: Airborne - Launch Trailer',
+ 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
'uploader': 'Gameloft',
'uploader_id': 'gameloft',
- 'upload_date': '20130821',
- 'description': 'md5:87bd95f13d8be3e7da87a5f2c443106a',
+ 'upload_date': '20140828',
+ 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
},
'params': {
'skip_download': True,
'uploader': 'education-portal.com',
},
},
+ {
+ 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
+ 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
+ 'info_dict': {
+ 'id': 'uxjb0lwrcz',
+ 'ext': 'mp4',
+ 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
+ 'duration': 1715.0,
+ 'uploader': 'thoughtworks.wistia.com',
+ },
+ },
]
def report_following_redirect(self, new_url):
'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
) % (url, url), expected=True)
else:
- assert ':' in default_search
+ if ':' not in default_search:
+ default_search += ':'
return self.url_result(default_search + url)
url, smuggled_data = unsmuggle_url(url)
if mobj:
player_url = unescapeHTML(mobj.group('url'))
surl = smuggle_url(player_url, {'Referer': url})
- return self.url_result(surl, 'Vimeo')
+ return self.url_result(surl)
# Look for embedded (swf embed) Vimeo player
mobj = re.search(
- r'<embed[^>]+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
+ r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
if mobj:
- return self.url_result(mobj.group(1), 'Vimeo')
+ return self.url_result(mobj.group(1))
# Look for embedded YouTube player
matches = re.findall(r'''(?x)
return _playlist_from_matches(
matches, lambda m: unescapeHTML(m[1]))
+ # Look for embedded Dailymotion playlist player (#3822)
+ m = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
+ if m:
+ playlists = re.findall(
+ r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
+ if playlists:
+ return _playlist_from_matches(
+ playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
+
# Look for embedded Wistia player
match = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
+ r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
if match:
+ embed_url = self._proto_relative_url(
+ unescapeHTML(match.group('url')))
return {
'_type': 'url_transparent',
- 'url': unescapeHTML(match.group('url')),
+ 'url': embed_url,
'ie_key': 'Wistia',
'uploader': video_uploader,
'title': video_title,
'id': video_id,
}
- match = re.search(r'(?:id=["\']wistia_|data-wistiaid=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
+
+ match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
if match:
return {
'_type': 'url_transparent',
if mobj is not None:
return self.url_result(mobj.group('url'), 'MLB')
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
+ webpage)
+ if mobj is not None:
+ return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
+
+ def check_video(vurl):
+ vpath = compat_urlparse.urlparse(vurl).path
+ vext = determine_ext(vpath)
+ return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
+
+ def filter_video(urls):
+ return list(filter(check_video, urls))
+
# Start with something easy: JW Player in SWFObject
- found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
+ found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
if not found:
# Look for gorilla-vid style embedding
- found = re.findall(r'''(?sx)
+ found = filter_video(re.findall(r'''(?sx)
(?:
jw_plugins|
JWPlayerOptions|
jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
)
- .*?file\s*:\s*["\'](.*?)["\']''', webpage)
+ .*?file\s*:\s*["\'](.*?)["\']''', webpage))
if not found:
# Broaden the search a little bit
- found = re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
+ found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
if not found:
# Broaden the findall a little bit: JWPlayer JS loader
- found = re.findall(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
+ found = filter_video(re.findall(
+ r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
if not found:
# Flow player
- found = re.findall(r'''(?xs)
+ found = filter_video(re.findall(r'''(?xs)
flowplayer\("[^"]+",\s*
\{[^}]+?\}\s*,
\s*{[^}]+? ["']?clip["']?\s*:\s*\{\s*
["']?url["']?\s*:\s*["']([^"']+)["']
- ''', webpage)
+ ''', webpage))
if not found:
# Try to find twitter cards info
- found = re.findall(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
+ found = filter_video(re.findall(
+ r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
if not found:
# We look for Open Graph info:
# We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
if m_video_type is not None:
- def check_video(vurl):
- vpath = compat_urlparse.urlparse(vurl).path
- vext = determine_ext(vpath)
- return '.' in vpath and vext not in ('swf', 'png', 'jpg')
- found = list(filter(
- check_video,
- re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)))
+ found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
if not found:
# HTML5 video
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]+)? src="([^"]+)"', webpage)