X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=a62287e50d34f36840aab34ad3cb3988f85b0193;hb=f7e6f7fa2374864ba28fbe6840a0462b7c1033db;hp=5c03fddc6a2ee548a6617a3aea9ba161f6b3777d;hpb=a5158f38a31e863a39de8f66c26469a5d4469280;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5c03fddc6..a62287e50 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -8,7 +8,6 @@ import re from .common import InfoExtractor from .youtube import YoutubeIE from ..compat import ( - compat_urllib_parse, compat_urllib_parse_unquote, compat_urllib_request, compat_urlparse, @@ -37,6 +36,7 @@ from .rutv import RUTVIE from .tvc import TVCIE from .sportbox import SportBoxEmbedIE from .smotri import SmotriIE +from .myvi import MyviIE from .condenast import CondeNastIE from .udn import UDNEmbedIE from .senateisvp import SenateISVPIE @@ -46,6 +46,8 @@ from .pornhub import PornHubIE from .xhamster import XHamsterEmbedIE from .vimeo import VimeoIE from .dailymotion import DailymotionCloudIE +from .onionstudios import OnionStudiosIE +from .snagfilms import SnagFilmsEmbedIE class GenericIE(InfoExtractor): @@ -336,6 +338,17 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + # Myvi.ru embed + { + 'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1', + 'info_dict': { + 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e', + 'ext': 'mp4', + 'title': 'Ужастики, русский трейлер (2015)', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 153, + } + }, # XHamster embed { 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8', @@ -667,6 +680,18 @@ class GenericIE(InfoExtractor): 'title': 'John Carlson Postgame 2/25/15', }, }, + # Kaltura embed (different embed code) + { + 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014', + 'info_dict': { + 'id': '1_a52wc67y', + 'ext': 'flv', + 'upload_date': '20150127', + 'uploader_id': 'PremierMedia', + 'timestamp': int, + 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014', + }, + }, # Eagle.Platform embed (generic URL) { 'url': 'http://lenta.ru/news/2015/03/06/navalny/', @@ -836,6 +861,27 @@ class GenericIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpe?g$', } }, + # OnionStudios embed + { + 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537', + 'info_dict': { + 'id': '2855', + 'ext': 'mp4', + 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You', + 'thumbnail': 're:^https?://.*\.jpe?g$', + 'uploader': 'ClickHole', + 'uploader_id': 'clickhole', + } + }, + # SnagFilms embed + { + 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html', + 'info_dict': { + 'id': '74849a00-85a9-11e1-9660-123139220831', + 'ext': 'mp4', + 'title': '#whilewewatch', + } + }, # AdobeTVVideo embed { 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners', @@ -1014,7 +1060,9 @@ class GenericIE(InfoExtractor): } if not self._downloader.params.get('test', False) and not is_intentional: - self._downloader.report_warning('Falling back on generic information extractor.') + force = self._downloader.params.get('force_generic_extractor', False) + self._downloader.report_warning( + '%s on generic information extractor.' % ('Forcing' if force else 'Falling back')) if not full_response: request = compat_urllib_request.Request(url) @@ -1066,7 +1114,7 @@ class GenericIE(InfoExtractor): # Sometimes embedded video player is hidden behind percent encoding # (e.g. https://github.com/rg3/youtube-dl/issues/2448) # Unescaping the whole page allows to handle those cases in a generic way - webpage = compat_urllib_parse.unquote(webpage) + webpage = compat_urllib_parse_unquote(webpage) # it's tempting to parse this further, but you would # have to take into account all the variations like @@ -1320,7 +1368,7 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url')) mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P[^&]+)', webpage) if mobj is not None: - return self.url_result(compat_urllib_parse.unquote(mobj.group('url'))) + return self.url_result(compat_urllib_parse_unquote(mobj.group('url'))) # Look for funnyordie embed matches = re.findall(r']+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage) @@ -1388,6 +1436,11 @@ class GenericIE(InfoExtractor): if smotri_url: return self.url_result(smotri_url, 'Smotri') + # Look for embedded Myvi.ru player + myvi_url = MyviIE._extract_url(webpage) + if myvi_url: + return self.url_result(myvi_url) + # Look for embeded soundcloud player mobj = re.search( r'https?://(?:w\.)?soundcloud\.com/player[^"]+)"', @@ -1467,8 +1520,8 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url'), 'Zapiks') # Look for Kaltura embeds - mobj = re.search( - r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P[^']+)',.*?'entry_id'\s*:\s*'(?P[^']+)',", webpage) + mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P[^']+)',.*?'entry_id'\s*:\s*'(?P[^']+)',", webpage) or + re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P[^\2]+?)\2', webpage)) if mobj is not None: return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura') @@ -1530,6 +1583,16 @@ class GenericIE(InfoExtractor): if dmcloud_url: return self.url_result(dmcloud_url, 'DailymotionCloud') + # Look for OnionStudios embeds + onionstudios_url = OnionStudiosIE._extract_url(webpage) + if onionstudios_url: + return self.url_result(onionstudios_url) + + # Look for SnagFilms embeds + snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage) + if snagfilms_url: + return self.url_result(snagfilms_url) + # Look for AdobeTVVideo embeds mobj = re.search( r']+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]', @@ -1618,7 +1681,7 @@ class GenericIE(InfoExtractor): entries = [] for video_url in found: video_url = compat_urlparse.urljoin(url, video_url) - video_id = compat_urllib_parse.unquote(os.path.basename(video_url)) + video_id = compat_urllib_parse_unquote(os.path.basename(video_url)) # Sometimes, jwplayer extraction will result in a YouTube URL if YoutubeIE.suitable(video_url):