[slideshare] Fix extraction
authorSergey M․ <dstftw@gmail.com>
Wed, 31 Dec 2014 18:26:19 +0000 (00:26 +0600)
committerSergey M․ <dstftw@gmail.com>
Wed, 31 Dec 2014 18:26:19 +0000 (00:26 +0600)
youtube_dl/extractor/slideshare.py

index e7d776e7bd8bd3334ff0da1203cd91d52508a6ef..9f79ff5c1b66d2bf37369a6009a914043493b407 100644 (file)
@@ -30,7 +30,7 @@ class SlideshareIE(InfoExtractor):
         page_title = mobj.group('title')
         webpage = self._download_webpage(url, page_title)
         slideshare_obj = self._search_regex(
-            r'var slideshare_object =  ({.*?}); var user_info =',
+            r'var\s+slideshare_object\s*=\s*({.*?});\s*var\s+user_info\s*=',
             webpage, 'slideshare object')
         info = json.loads(slideshare_obj)
         if info['slideshow']['type'] != 'video':
@@ -41,7 +41,7 @@ class SlideshareIE(InfoExtractor):
         ext = info['jsplayer']['video_extension']
         video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
         description = self._html_search_regex(
-            r'<p\s+(?:style="[^"]*"\s+)?class=".*?description.*?"[^>]*>(.*?)</p>', webpage,
+            r'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>', webpage,
             'description', fatal=False)
 
         return {