X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=9057a6beb97a0d0cc2fca33d1d0ccc30c07b8101;hb=6b445558ffe292c11327862ab510b1abf4e36616;hp=c16da70f1d50d3fd5729071fe1599c1b25da7885;hpb=c59c3c84ede823e5c97f695ae904545c615e4ded;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c16da70f1..9057a6beb 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -28,6 +28,7 @@ from .brightcove import BrightcoveIE from .ooyala import OoyalaIE from .rutv import RUTVIE from .smotri import SmotriIE +from .condenast import CondeNastIE class GenericIE(InfoExtractor): @@ -608,13 +609,13 @@ class GenericIE(InfoExtractor): if mobj: player_url = unescapeHTML(mobj.group('url')) surl = smuggle_url(player_url, {'Referer': url}) - return self.url_result(surl, 'Vimeo') + return self.url_result(surl) # Look for embedded (swf embed) Vimeo player mobj = re.search( - r']+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) + r']+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) if mobj: - return self.url_result(mobj.group(1), 'Vimeo') + return self.url_result(mobj.group(1)) # Look for embedded YouTube player matches = re.findall(r'''(?x) @@ -653,15 +654,17 @@ class GenericIE(InfoExtractor): match = re.search( r']+?src=(["\'])(?P(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage) if match: + embed_url = self._proto_relative_url( + unescapeHTML(match.group('url'))) return { '_type': 'url_transparent', - 'url': unescapeHTML(match.group('url')), + 'url': embed_url, 'ie_key': 'Wistia', 'uploader': video_uploader, 'title': video_title, 'id': video_id, } - match = re.search(r'(?:id=["\']wistia_|data-wistiaid=["\']|Wistia\.embed\(["\'])(?P[^"\']+)', webpage) + match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P[^"\']+)', webpage) if match: return { '_type': 'url_transparent', @@ -847,47 +850,57 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url'), 'MLB') + mobj = re.search( + r']+?src=(["\'])(?P%s)\1' % CondeNastIE.EMBED_URL, + webpage) + if mobj is not None: + return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast') + + def check_video(vurl): + vpath = compat_urlparse.urlparse(vurl).path + vext = determine_ext(vpath) + return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml') + + def filter_video(urls): + return list(filter(check_video, urls)) + # Start with something easy: JW Player in SWFObject - found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) + found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)) if not found: # Look for gorilla-vid style embedding - found = re.findall(r'''(?sx) + found = filter_video(re.findall(r'''(?sx) (?: jw_plugins| JWPlayerOptions| jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup ) - .*?file\s*:\s*["\'](.*?)["\']''', webpage) + .*?file\s*:\s*["\'](.*?)["\']''', webpage)) if not found: # Broaden the search a little bit - found = re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) + found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)) if not found: # Broaden the findall a little bit: JWPlayer JS loader - found = re.findall(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage) + found = filter_video(re.findall( + r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)) if not found: # Flow player - found = re.findall(r'''(?xs) + found = filter_video(re.findall(r'''(?xs) flowplayer\("[^"]+",\s* \{[^}]+?\}\s*, \s*{[^}]+? ["']?clip["']?\s*:\s*\{\s* ["']?url["']?\s*:\s*["']([^"']+)["'] - ''', webpage) + ''', webpage)) if not found: # Try to find twitter cards info - found = re.findall(r'.*?]+)? src="([^"]+)"', webpage)