X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fslideshare.py;h=0b717a1e42b8dd2c3d8a88d602f001876cf99e03;hb=c3124c3085e6a9a83ee31ace3a7d528a324c42da;hp=afc3001b57f404486e2fa3a9c911bf4eec663b46;hpb=94518f208799dde250f5fd8dd0ce77dd7ea05b1c;p=youtube-dl diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dl/extractor/slideshare.py index afc3001b5..0b717a1e4 100644 --- a/youtube_dl/extractor/slideshare.py +++ b/youtube_dl/extractor/slideshare.py @@ -1,9 +1,13 @@ +from __future__ import unicode_literals + import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urlparse, +) +from ..utils import ( ExtractorError, ) @@ -12,11 +16,12 @@ class SlideshareIE(InfoExtractor): _VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P.+?)($|\?)' _TEST = { - u'url': u'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity', - u'file': u'25665706.mp4', - u'info_dict': { - u'title': u'Managing Scale and Complexity', - u'description': u'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix', + 'url': 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity', + 'info_dict': { + 'id': '25665706', + 'ext': 'mp4', + 'title': 'Managing Scale and Complexity', + 'description': 'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix.', }, } @@ -25,16 +30,19 @@ class SlideshareIE(InfoExtractor): page_title = mobj.group('title') webpage = self._download_webpage(url, page_title) slideshare_obj = self._search_regex( - r'var slideshare_object = ({.*?}); var user_info =', - webpage, u'slideshare object') + r'\$\.extend\(slideshare_object,\s*(\{.*?\})\);', + webpage, 'slideshare object') info = json.loads(slideshare_obj) - if info['slideshow']['type'] != u'video': - raise ExtractorError(u'Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True) + if info['slideshow']['type'] != 'video': + raise ExtractorError('Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True) doc = info['doc'] bucket = info['jsplayer']['video_bucket'] ext = info['jsplayer']['video_extension'] video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext) + description = self._html_search_regex( + r'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>', webpage, + 'description', fatal=False) return { '_type': 'video', @@ -43,5 +51,5 @@ class SlideshareIE(InfoExtractor): 'ext': ext, 'url': video_url, 'thumbnail': info['slideshow']['pin_image_url'], - 'description': self._og_search_description(webpage), + 'description': description, }