X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=ea60d4a96cb86a6d22406be7f92b6d1fdcb8eceb;hb=f7c272d4fa9cdb6212e8716e813f5d55a16137a2;hp=07939b196daa6ef51d3971bbc4d40ea87343c1ca;hpb=756f574e4e7160ca5b39c6e18ec5168beb4a8eb1;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 07939b196..ea60d4a96 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -43,8 +43,11 @@ from .senateisvp import SenateISVPIE from .bliptv import BlipTVIE from .svt import SVTIE from .pornhub import PornHubIE +from .xhamster import XHamsterEmbedIE from .vimeo import VimeoIE from .dailymotion import DailymotionCloudIE +from .onionstudios import OnionStudiosIE +from .snagfilms import SnagFilmsEmbedIE class GenericIE(InfoExtractor): @@ -335,6 +338,15 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + # XHamster embed + { + 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8', + 'info_dict': { + 'id': 'showthread', + 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )', + }, + 'playlist_mincount': 7, + }, # Embedded TED video { 'url': 'http://en.support.wordpress.com/videos/ted-talks/', @@ -657,6 +669,18 @@ class GenericIE(InfoExtractor): 'title': 'John Carlson Postgame 2/25/15', }, }, + # Kaltura embed (different embed code) + { + 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014', + 'info_dict': { + 'id': '1_a52wc67y', + 'ext': 'flv', + 'upload_date': '20150127', + 'uploader_id': 'PremierMedia', + 'timestamp': int, + 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014', + }, + }, # Eagle.Platform embed (generic URL) { 'url': 'http://lenta.ru/news/2015/03/06/navalny/', @@ -825,6 +849,39 @@ class GenericIE(InfoExtractor): 'title': 'Le débat', 'thumbnail': 're:^https?://.*\.jpe?g$', } + }, + # OnionStudios embed + { + 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537', + 'info_dict': { + 'id': '2855', + 'ext': 'mp4', + 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You', + 'thumbnail': 're:^https?://.*\.jpe?g$', + 'uploader': 'ClickHole', + 'uploader_id': 'clickhole', + } + }, + # SnagFilms embed + { + 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html', + 'info_dict': { + 'id': '74849a00-85a9-11e1-9660-123139220831', + 'ext': 'mp4', + 'title': '#whilewewatch', + } + }, + # AdobeTVVideo embed + { + 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners', + 'md5': '43662b577c018ad707a63766462b1e87', + 'info_dict': { + 'id': '2456', + 'ext': 'mp4', + 'title': 'New experience with Acrobat DC', + 'description': 'New experience with Acrobat DC', + 'duration': 248.667, + }, } ] @@ -992,7 +1049,9 @@ class GenericIE(InfoExtractor): } if not self._downloader.params.get('test', False) and not is_intentional: - self._downloader.report_warning('Falling back on generic information extractor.') + force = self._downloader.params.get('force_generic_extractor', False) + self._downloader.report_warning( + '%s on generic information extractor.' % ('Forcing' if force else 'Falling back')) if not full_response: request = compat_urllib_request.Request(url) @@ -1331,6 +1390,11 @@ class GenericIE(InfoExtractor): if pornhub_url: return self.url_result(pornhub_url, 'PornHub') + # Look for embedded XHamster player + xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) + if xhamster_urls: + return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed') + # Look for embedded Tvigle player mobj = re.search( r']+?src=(["\'])(?P(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage) @@ -1440,8 +1504,8 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url'), 'Zapiks') # Look for Kaltura embeds - mobj = re.search( - r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P[^']+)',.*?'entry_id'\s*:\s*'(?P[^']+)',", webpage) + mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P[^']+)',.*?'entry_id'\s*:\s*'(?P[^']+)',", webpage) or + re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P[^\2]+?)\2', webpage)) if mobj is not None: return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura') @@ -1503,6 +1567,25 @@ class GenericIE(InfoExtractor): if dmcloud_url: return self.url_result(dmcloud_url, 'DailymotionCloud') + # Look for OnionStudios embeds + onionstudios_url = OnionStudiosIE._extract_url(webpage) + if onionstudios_url: + return self.url_result(onionstudios_url) + + # Look for SnagFilms embeds + snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage) + if snagfilms_url: + return self.url_result(snagfilms_url) + + # Look for AdobeTVVideo embeds + mobj = re.search( + r']+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]', + webpage) + if mobj is not None: + return self.url_result( + self._proto_relative_url(unescapeHTML(mobj.group(1))), + 'AdobeTVVideo') + def check_video(vurl): if YoutubeIE.suitable(vurl): return True