Add an extractor for Slideshare (closes #1400)

author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Tue, 10 Sep 2013 09:19:58 +0000 (11:19 +0200)

committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Tue, 10 Sep 2013 09:19:58 +0000 (11:19 +0200)
author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Tue, 10 Sep 2013 09:19:58 +0000 (11:19 +0200)
committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Tue, 10 Sep 2013 09:19:58 +0000 (11:19 +0200)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index fbe0b8cb7ad1693230fa87ccb5121e1047bf0652..bedb208fbecf771b25f40a7868cd5c2dd62a33d1 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -75,6 +75,7 @@ from .roxwel import RoxwelIE
  from .rtlnow import RTLnowIE
  from .sina import SinaIE
  from .slashdot import SlashdotIE
  from .rtlnow import RTLnowIE
  from .sina import SinaIE
  from .slashdot import SlashdotIE
+from .slideshare import SlideshareIE
  from .sohu import SohuIE
  from .soundcloud import SoundcloudIE, SoundcloudSetIE
  from .spiegel import SpiegelIE
  from .sohu import SohuIE
  from .soundcloud import SoundcloudIE, SoundcloudSetIE
  from .spiegel import SpiegelIE
diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dl/extractor/slideshare.py

new file mode 100644 (file)

index 0000000..afc3001
--- /dev/null
+++ b/youtube_dl/extractor/slideshare.py
@@ -0,0 +1,47 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urlparse,
+    ExtractorError,
+)
+
+
+class SlideshareIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
+
+    _TEST = {
+        u'url': u'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
+        u'file': u'25665706.mp4',
+        u'info_dict': {
+            u'title': u'Managing Scale and Complexity',
+            u'description': u'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        page_title = mobj.group('title')
+        webpage = self._download_webpage(url, page_title)
+        slideshare_obj = self._search_regex(
+            r'var slideshare_object =  ({.*?}); var user_info =',
+            webpage, u'slideshare object')
+        info = json.loads(slideshare_obj)
+        if info['slideshow']['type'] != u'video':
+            raise ExtractorError(u'Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
+
+        doc = info['doc']
+        bucket = info['jsplayer']['video_bucket']
+        ext = info['jsplayer']['video_extension']
+        video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
+
+        return {
+            '_type': 'video',
+            'id': info['slideshow']['id'],
+            'title': info['slideshow']['title'],
+            'ext': ext,
+            'url': video_url,
+            'thumbnail': info['slideshow']['pin_image_url'],
+            'description': self._og_search_description(webpage),
+        }
author	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Tue, 10 Sep 2013 09:19:58 +0000 (11:19 +0200)
committer	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Tue, 10 Sep 2013 09:19:58 +0000 (11:19 +0200)
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/slideshare.py	[new file with mode: 0644]	patch \| blob