+ video_uploader = self._search_regex(
+ r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
+
+ # Helper method
+ def _playlist_from_matches(matches, getter, ie=None):
+ urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches)
+ return self.playlist_result(
+ urlrs, playlist_id=video_id, playlist_title=video_title)
+
+ # Look for BrightCove:
+ bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
+ if bc_urls:
+ self.to_screen('Brightcove video detected.')
+ entries = [{
+ '_type': 'url',
+ 'url': smuggle_url(bc_url, {'Referer': url}),
+ 'ie_key': 'Brightcove'
+ } for bc_url in bc_urls]
+
+ return {
+ '_type': 'playlist',
+ 'title': video_title,
+ 'id': video_id,
+ 'entries': entries,
+ }
+
+ # Look for embedded (iframe) Vimeo player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
+ if mobj:
+ player_url = unescapeHTML(mobj.group('url'))
+ surl = smuggle_url(player_url, {'Referer': url})
+ return self.url_result(surl, 'Vimeo')
+
+ # Look for embedded (swf embed) Vimeo player
+ mobj = re.search(
+ r'<embed[^>]+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
+ if mobj:
+ return self.url_result(mobj.group(1), 'Vimeo')
+
+ # Look for embedded YouTube player
+ matches = re.findall(r'''(?x)
+ (?:
+ <iframe[^>]+?src=|
+ data-video-url=|
+ <embed[^>]+?src=|
+ embedSWF\(?:\s*
+ )
+ (["\'])
+ (?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
+ (?:embed|v)/.+?)
+ \1''', webpage)
+ if matches:
+ return _playlist_from_matches(
+ matches, lambda m: unescapeHTML(m[1]), ie='Youtube')
+
+ # Look for embedded Dailymotion player
+ matches = re.findall(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
+ if matches:
+ return _playlist_from_matches(
+ matches, lambda m: unescapeHTML(m[1]))
+
+ # Look for embedded Wistia player
+ match = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
+ if match:
+ return {
+ '_type': 'url_transparent',
+ 'url': unescapeHTML(match.group('url')),
+ 'ie_key': 'Wistia',
+ 'uploader': video_uploader,
+ 'title': video_title,
+ 'id': video_id,
+ }
+
+ # Look for embedded blip.tv player
+ mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
+ if mobj:
+ return self.url_result('http://blip.tv/a/a-'+mobj.group(1), 'BlipTV')
+ mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
+ if mobj:
+ return self.url_result(mobj.group(1), 'BlipTV')
+
+ # Look for embedded condenast player
+ matches = re.findall(
+ r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
+ webpage)
+ if matches:
+ return {
+ '_type': 'playlist',
+ 'entries': [{
+ '_type': 'url',
+ 'ie_key': 'CondeNast',
+ 'url': ma,
+ } for ma in matches],
+ 'title': video_title,
+ 'id': video_id,
+ }
+
+ # Look for Bandcamp pages with custom domain
+ mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
+ if mobj is not None:
+ burl = unescapeHTML(mobj.group(1))
+ # Don't set the extractor because it can be a track url or an album
+ return self.url_result(burl)
+
+ # Look for embedded Vevo player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for Ooyala videos
+ mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
+ re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
+ if mobj is not None:
+ return OoyalaIE._build_url_result(mobj.group('ec'))
+
+ # Look for Aparat videos
+ mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group(1), 'Aparat')
+
+ # Look for MPORA videos
+ mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group(1), 'Mpora')
+
+ # Look for embedded NovaMov-based player
+ mobj = re.search(
+ r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
+ (?P<url>http://(?:(?:embed|www)\.)?
+ (?:novamov\.com|
+ nowvideo\.(?:ch|sx|eu|at|ag|co)|
+ videoweed\.(?:es|com)|
+ movshare\.(?:net|sx|ag)|
+ divxstage\.(?:eu|net|ch|co|at|ag))
+ /embed\.php.+?)\1''', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for embedded Facebook player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Facebook')
+
+ # Look for embedded VK player
+ mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'VK')
+
+ # Look for embedded ivi player
+ mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Ivi')
+
+ # Look for embedded Huffington Post player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'HuffPost')
+
+ # Look for embed.ly
+ mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+ mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
+ if mobj is not None:
+ return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
+
+ # Look for funnyordie embed
+ matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
+ if matches:
+ return _playlist_from_matches(
+ matches, getter=unescapeHTML, ie='FunnyOrDie')
+
+ # Look for embedded RUTV player
+ rutv_url = RUTVIE._extract_url(webpage)
+ if rutv_url:
+ return self.url_result(rutv_url, 'RUTV')
+
+ # Look for embedded TED player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'TED')
+
+ # Look for embedded Ustream videos
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Ustream')
+
+ # Look for embedded arte.tv player
+ mobj = re.search(
+ r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'ArteTVEmbed')
+
+ # Look for embedded smotri.com player
+ smotri_url = SmotriIE._extract_url(webpage)
+ if smotri_url:
+ return self.url_result(smotri_url, 'Smotri')
+
+ # Look for embeded soundcloud player
+ mobj = re.search(
+ r'<iframe src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
+ webpage)
+ if mobj is not None:
+ url = unescapeHTML(mobj.group('url'))
+ return self.url_result(url)
+
+ # Look for embedded vulture.com player
+ mobj = re.search(
+ r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
+ webpage)
+ if mobj is not None:
+ url = unescapeHTML(mobj.group('url'))
+ return self.url_result(url, ie='Vulture')
+
+ # Look for embedded mtvservices player
+ mobj = re.search(
+ r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
+ webpage)
+ if mobj is not None:
+ url = unescapeHTML(mobj.group('url'))
+ return self.url_result(url, ie='MTVServicesEmbedded')
+
+ # Look for embedded yahoo player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Yahoo')
+
+ # Look for embedded sbs.com.au player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'SBS')
+
+ # Start with something easy: JW Player in SWFObject
+ found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
+ if not found:
+ # Look for gorilla-vid style embedding
+ found = re.findall(r'''(?sx)
+ (?:
+ jw_plugins|
+ JWPlayerOptions|
+ jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
+ )
+ .*?file\s*:\s*["\'](.*?)["\']''', webpage)
+ if not found:
+ # Broaden the search a little bit
+ found = re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
+ if not found:
+ # Broaden the findall a little bit: JWPlayer JS loader
+ found = re.findall(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
+ if not found:
+ # Flow player
+ found = re.findall(r'''(?xs)
+ flowplayer\("[^"]+",\s*
+ \{[^}]+?\}\s*,
+ \s*{[^}]+? ["']?clip["']?\s*:\s*\{\s*
+ ["']?url["']?\s*:\s*["']([^"']+)["']
+ ''', webpage)
+ if not found:
+ # Try to find twitter cards info
+ found = re.findall(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
+ if not found:
+ # We look for Open Graph info:
+ # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
+ m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
+ # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
+ if m_video_type is not None:
+ def check_video(vurl):
+ vpath = compat_urlparse.urlparse(vurl).path
+ vext = determine_ext(vpath)
+ return '.' in vpath and vext not in ('swf', 'png', 'jpg')
+ found = list(filter(
+ check_video,
+ re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)))
+ if not found:
+ # HTML5 video
+ found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]+)? src="([^"]+)"', webpage)
+ if not found:
+ found = re.search(
+ r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
+ r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"',
+ webpage)
+ if found:
+ new_url = found.group(1)
+ self.report_following_redirect(new_url)
+ return {
+ '_type': 'url',
+ 'url': new_url,
+ }
+ if not found:
+ raise ExtractorError('Unsupported URL: %s' % url)
+
+ entries = []
+ for video_url in found:
+ video_url = compat_urlparse.urljoin(url, video_url)
+ video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
+
+ # Sometimes, jwplayer extraction will result in a YouTube URL
+ if YoutubeIE.suitable(video_url):
+ entries.append(self.url_result(video_url, 'Youtube'))
+ continue
+
+ # here's a fun little line of code for you:
+ video_id = os.path.splitext(video_id)[0]
+
+ entries.append({
+ 'id': video_id,
+ 'url': video_url,
+ 'uploader': video_uploader,
+ 'title': video_title,
+ 'age_limit': age_limit,
+ })
+
+ if len(entries) == 1:
+ return entries[0]
+ else:
+ for num, e in enumerate(entries, start=1):
+ e['title'] = '%s (%d)' % (e['title'], num)
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ }
+