[sharesix] Add new extractor
authorNaglis Jonaitis <njonaitis@gmail.com>
Sat, 6 Sep 2014 15:59:15 +0000 (18:59 +0300)
committerNaglis Jonaitis <njonaitis@gmail.com>
Sat, 6 Sep 2014 15:59:15 +0000 (18:59 +0300)
youtube_dl/extractor/__init__.py
youtube_dl/extractor/sharesix.py [new file with mode: 0644]

index 7adca7df918e293a57b73f9e585e366cf8780ca6..c76fb37273bcdd1f91a0816ec7f169d515064189 100644 (file)
@@ -296,6 +296,7 @@ from .scivee import SciVeeIE
 from .screencast import ScreencastIE
 from .servingsys import ServingSysIE
 from .shared import SharedIE
+from .sharesix import ShareSixIE
 from .sina import SinaIE
 from .slideshare import SlideshareIE
 from .slutload import SlutloadIE
diff --git a/youtube_dl/extractor/sharesix.py b/youtube_dl/extractor/sharesix.py
new file mode 100644 (file)
index 0000000..7531e83
--- /dev/null
@@ -0,0 +1,91 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    compat_urllib_request,
+    parse_duration,
+)
+
+
+class ShareSixIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?sharesix\.com/(?:f/)?(?P<id>[0-9a-zA-Z]+)'
+    _TESTS = [
+        {
+            'url': 'http://sharesix.com/f/OXjQ7Y6',
+            'md5': '9e8e95d8823942815a7d7c773110cc93',
+            'info_dict': {
+                'id': 'OXjQ7Y6',
+                'ext': 'mp4',
+                'title': 'big_buck_bunny_480p_surround-fix.avi',
+                'duration': 596,
+                'width': 854,
+                'height': 480,
+            },
+        },
+        {
+            'url': 'http://sharesix.com/lfrwoxp35zdd',
+            'md5': 'dd19f1435b7cec2d7912c64beeee8185',
+            'info_dict': {
+                'id': 'lfrwoxp35zdd',
+                'ext': 'flv',
+                'title': 'WhiteBoard___a_Mac_vs_PC_Parody_Cartoon.mp4.flv',
+                'duration': 65,
+                'width': 1280,
+                'height': 720,
+            },
+        }
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        fields = {
+            'method_free': 'Free'
+        }
+        post = compat_urllib_parse.urlencode(fields)
+        req = compat_urllib_request.Request(url, post)
+        req.add_header('Content-type', 'application/x-www-form-urlencoded')
+
+        webpage = self._download_webpage(req, video_id,
+                                         'Downloading video page')
+
+        video_url = self._search_regex(
+            r"var\slnk1\s=\s'([^']+)'", webpage, 'video URL')
+        title = self._html_search_regex(
+            r'(?s)<dt>Filename:</dt>.+?<dd>(.+?)</dd>', webpage, 'title')
+        duration = parse_duration(
+            self._search_regex(
+                r'(?s)<dt>Length:</dt>.+?<dd>(.+?)</dd>',
+                webpage,
+                'duration',
+                fatal=False
+            )
+        )
+
+        m = re.search(
+            r'''(?xs)<dt>Width\sx\sHeight</dt>.+?
+                     <dd>(?P<width>\d+)\sx\s(?P<height>\d+)</dd>''',
+            webpage
+        )
+        width = height = None
+        if m:
+            width, height = int(m.group('width')), int(m.group('height'))
+
+        formats = [{
+            'format_id': 'sd',
+            'url': video_url,
+            'width': width,
+            'height': height,
+        }]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'duration': duration,
+            'formats': formats,
+        }