X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=32e41d13e1a73db287440e9ddbe1f570777b97cd;hb=0eb5c1c62a2535ceaf10202e3feba8e556065f15;hp=759691365b3258f4d6fce045e4877c5798313341;hpb=b26733ba7f376f8c9285ac7928534286622bbc7c;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 759691365..32e41d13e 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -34,6 +34,7 @@ from .brightcove import BrightcoveIE from .nbc import NBCSportsVPlayerIE from .ooyala import OoyalaIE from .rutv import RUTVIE +from .tvc import TVCIE from .sportbox import SportBoxEmbedIE from .smotri import SmotriIE from .condenast import CondeNastIE @@ -41,6 +42,12 @@ from .udn import UDNEmbedIE from .senateisvp import SenateISVPIE from .bliptv import BlipTVIE from .svt import SVTIE +from .pornhub import PornHubIE +from .xhamster import XHamsterEmbedIE +from .vimeo import VimeoIE +from .dailymotion import DailymotionCloudIE +from .onionstudios import OnionStudiosIE +from .snagfilms import SnagFilmsEmbedIE class GenericIE(InfoExtractor): @@ -291,6 +298,15 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + # TVC embed + { + 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/', + 'info_dict': { + 'id': '55304', + 'ext': 'mp4', + 'title': 'Дошкольное воспитание', + }, + }, # SportBox embed { 'url': 'http://www.vestifinance.ru/articles/25753', @@ -322,6 +338,15 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + # XHamster embed + { + 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8', + 'info_dict': { + 'id': 'showthread', + 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )', + }, + 'playlist_mincount': 7, + }, # Embedded TED video { 'url': 'http://en.support.wordpress.com/videos/ted-talks/', @@ -801,6 +826,50 @@ class GenericIE(InfoExtractor): 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.', 'uploader': 'Rogers Sportsnet', }, + }, + # Dailymotion Cloud video + { + 'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910', + 'md5': '49444254273501a64675a7e68c502681', + 'info_dict': { + 'id': '5585de919473990de4bee11b', + 'ext': 'mp4', + 'title': 'Le débat', + 'thumbnail': 're:^https?://.*\.jpe?g$', + } + }, + # OnionStudios embed + { + 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537', + 'info_dict': { + 'id': '2855', + 'ext': 'mp4', + 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You', + 'thumbnail': 're:^https?://.*\.jpe?g$', + 'uploader': 'ClickHole', + 'uploader_id': 'clickhole', + } + }, + # SnagFilms embed + { + 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html', + 'info_dict': { + 'id': '74849a00-85a9-11e1-9660-123139220831', + 'ext': 'mp4', + 'title': '#whilewewatch', + } + }, + # AdobeTVVideo embed + { + 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners', + 'md5': '43662b577c018ad707a63766462b1e87', + 'info_dict': { + 'id': '2456', + 'ext': 'mp4', + 'title': 'New experience with Acrobat DC', + 'description': 'New experience with Acrobat DC', + 'duration': 248.667, + }, } ] @@ -968,7 +1037,9 @@ class GenericIE(InfoExtractor): } if not self._downloader.params.get('test', False) and not is_intentional: - self._downloader.report_warning('Falling back on generic information extractor.') + force = self._downloader.params.get('force_generic_extractor', False) + self._downloader.report_warning( + '%s on generic information extractor.' % ('Forcing' if force else 'Falling back')) if not full_response: request = compat_urllib_request.Request(url) @@ -1073,23 +1144,14 @@ class GenericIE(InfoExtractor): # Look for embedded rtl.nl player matches = re.findall( - r']+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"', webpage) if matches: return _playlist_from_matches(matches, ie='RtlNl') - # Look for embedded (iframe) Vimeo player - mobj = re.search( - r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) - if mobj: - player_url = unescapeHTML(mobj.group('url')) - surl = smuggle_url(player_url, {'Referer': url}) - return self.url_result(surl) - # Look for embedded (swf embed) Vimeo player - mobj = re.search( - r']+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) - if mobj: - return self.url_result(mobj.group(1)) + vimeo_url = VimeoIE._extract_vimeo_url(url, webpage) + if vimeo_url is not None: + return self.url_result(vimeo_url) # Look for embedded YouTube player matches = re.findall(r'''(?x) @@ -1301,11 +1363,32 @@ class GenericIE(InfoExtractor): if rutv_url: return self.url_result(rutv_url, 'RUTV') + # Look for embedded TVC player + tvc_url = TVCIE._extract_url(webpage) + if tvc_url: + return self.url_result(tvc_url, 'TVC') + # Look for embedded SportBox player sportbox_urls = SportBoxEmbedIE._extract_urls(webpage) if sportbox_urls: return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed') + # Look for embedded PornHub player + pornhub_url = PornHubIE._extract_url(webpage) + if pornhub_url: + return self.url_result(pornhub_url, 'PornHub') + + # Look for embedded XHamster player + xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) + if xhamster_urls: + return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed') + + # Look for embedded Tvigle player + mobj = re.search( + r']+?src=(["\'])(?P(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'Tvigle') + # Look for embedded TED player mobj = re.search( r']+?src=(["\'])(?Phttps?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage) @@ -1467,6 +1550,30 @@ class GenericIE(InfoExtractor): if senate_isvp_url: return self.url_result(senate_isvp_url, 'SenateISVP') + # Look for Dailymotion Cloud videos + dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage) + if dmcloud_url: + return self.url_result(dmcloud_url, 'DailymotionCloud') + + # Look for OnionStudios embeds + onionstudios_url = OnionStudiosIE._extract_url(webpage) + if onionstudios_url: + return self.url_result(onionstudios_url) + + # Look for SnagFilms embeds + snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage) + if snagfilms_url: + return self.url_result(snagfilms_url) + + # Look for AdobeTVVideo embeds + mobj = re.search( + r']+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]', + webpage) + if mobj is not None: + return self.url_result( + self._proto_relative_url(unescapeHTML(mobj.group(1))), + 'AdobeTVVideo') + def check_video(vurl): if YoutubeIE.suitable(vurl): return True