X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=7246ea7fc288754a529b4536e9bc8c2c055ff940;hb=c71dfccc98208be44b1f639af72a257dae34d966;hp=4b14cc5bfd99bd8a3176a2fee32ffb1f94c33a30;hpb=fa35cdad02e1c40094f01c9f8e6529da2f021563;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 4b14cc5bf..7246ea7fc 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -248,6 +248,60 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'title': 'Honda’s New Asimo Robot Is More Human Than Ever', } + }, + # Dailymotion embed + { + 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/', + 'md5': '441aeeb82eb72c422c7f14ec533999cd', + 'info_dict': { + 'id': 'k2mm4bCdJ6CQ2i7c8o2', + 'ext': 'mp4', + 'title': 'Le Zap de Spi0n n°216 - Zapping du Web', + 'uploader': 'Spi0n', + }, + 'add_ie': ['Dailymotion'], + }, + # YouTube embed + { + 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html', + 'info_dict': { + 'id': 'FXRb4ykk4S0', + 'ext': 'mp4', + 'title': 'The NBL Auction 2014', + 'uploader': 'BADMINTON England', + 'uploader_id': 'BADMINTONEvents', + 'upload_date': '20140603', + 'description': 'md5:9ef128a69f1e262a700ed83edb163a73', + }, + 'add_ie': ['Youtube'], + 'params': { + 'skip_download': True, + } + }, + # MTVSercices embed + { + 'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too', + 'md5': '35727f82f58c76d996fc188f9755b0d5', + 'info_dict': { + 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9', + 'ext': 'mp4', + 'title': 'Review', + 'description': 'Mario\'s life in the fast lane has never looked so good.', + }, + }, + # YouTube embed via + { + 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM', + 'md5': 'c267b1ab6d736057d64babaa37e07a66', + 'info_dict': { + 'id': 'Ybd-qmqYYpA', + 'ext': 'mp4', + 'title': 'Asphalt 8: Airborne - Chinese Great Wall - Android Game Trailer', + 'uploader': 'gameloftandroid', + 'uploader_id': 'gameloftandroid', + 'upload_date': '20140321', + 'description': 'md5:9c6dca5dd75b7131ce482ccf080749d6' + } } ] @@ -333,21 +387,38 @@ class GenericIE(InfoExtractor): } def _real_extract(self, url): + if url.startswith('//'): + return { + '_type': 'url', + 'url': self.http_scheme() + url, + } + parsed_url = compat_urlparse.urlparse(url) if not parsed_url.scheme: default_search = self._downloader.params.get('default_search') if default_search is None: - default_search = 'auto_warning' + default_search = 'fixup_error' - if default_search in ('auto', 'auto_warning'): + if default_search in ('auto', 'auto_warning', 'fixup_error'): if '/' in url: self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http') return self.url_result('http://' + url) - else: + elif default_search != 'fixup_error': if default_search == 'auto_warning': - self._downloader.report_warning( - 'Falling back to youtube search for %s . Set --default-search to "auto" to suppress this warning.' % url) + if re.match(r'^(?:url|URL)$', url): + raise ExtractorError( + 'Invalid URL: %r . Call youtube-dl like this: youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url, + expected=True) + else: + self._downloader.report_warning( + 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url) return self.url_result('ytsearch:' + url) + + if default_search in ('error', 'fixup_error'): + raise ExtractorError( + ('%r is not a valid URL. ' + 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube' + ) % (url, url), expected=True) else: assert ':' in default_search return self.url_result(default_search + url) @@ -455,13 +526,27 @@ class GenericIE(InfoExtractor): # Look for embedded YouTube player matches = re.findall(r'''(?x) - (?:]+?src=|embedSWF\(\s*) - (["\'])(?P(?:https?:)?//(?:www\.)?youtube\.com/ + (?: + ]+?src=| + data-video-url=| + ]+?src=| + embedSWF\(?:\s* + ) + (["\']) + (?P(?:https?:)?//(?:www\.)?youtube\.com/ (?:embed|v)/.+?) \1''', webpage) if matches: urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube') for tuppl in matches] + # First, ensure we have a duplicate free list of entries + seen = set() + new_list = [] + theurl = tuple(url.items()) + if theurl not in seen: + seen.add(theurl) + new_list.append(url) + urlrs = new_list return self.playlist_result( urlrs, playlist_id=video_id, playlist_title=video_title) @@ -469,8 +554,16 @@ class GenericIE(InfoExtractor): matches = re.findall( r']+?src=(["\'])(?P(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage) if matches: - urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion') + urlrs = [self.url_result(unescapeHTML(tuppl[1])) for tuppl in matches] + # First, ensure we have a duplicate free list of entries + seen = set() + new_list = [] + theurl = tuple(url.items()) + if theurl not in seen: + seen.add(theurl) + new_list.append(url) + urlrs = new_list return self.playlist_result( urlrs, playlist_id=video_id, playlist_title=video_title) @@ -531,7 +624,7 @@ class GenericIE(InfoExtractor): return OoyalaIE._build_url_result(mobj.group('ec')) # Look for Aparat videos - mobj = re.search(r'