[walla] Add new extractor
authornet <net@h-MacBook-Pro-sl-net.local>
Wed, 1 Oct 2014 20:45:35 +0000 (23:45 +0300)
committernet <net@h-MacBook-Pro-sl-net.local>
Wed, 1 Oct 2014 20:45:35 +0000 (23:45 +0300)
youtube_dl/extractor/walla.py [new file with mode: 0644]

diff --git a/youtube_dl/extractor/walla.py b/youtube_dl/extractor/walla.py
new file mode 100644 (file)
index 0000000..e687c3a
--- /dev/null
@@ -0,0 +1,70 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+
+import re
+
+from .common import InfoExtractor
+
+
+class WallaIE(InfoExtractor):
+    _VALID_URL = r'http://vod\.walla\.co\.il/\w+/(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one',
+        'info_dict': {
+            'id': '2642630',
+            'ext': 'flv',
+            'title': 'וואן דיירקשן: ההיסטריה',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        
+        video_id = mobj.group('id')
+
+        config_url = 'http://video2.walla.co.il/?w=null/null/%s/@@/video/flv_pl' % video_id
+        
+        webpage = self._download_webpage(config_url, video_id, '')
+
+        media_id = self._html_search_regex(r'<media_id>(\d+)</media_id>', webpage, video_id, 'extract media id')
+
+        prefix = '0' if len(media_id) == 7 else ''
+
+        series =  '%s%s' % (prefix, media_id[0:2])
+        session = media_id[2:5]
+        episode = media_id[5:7]
+        
+        title = self._html_search_regex(r'<title>(.*)</title>', webpage, video_id, 'title')
+
+        default_quality = self._html_search_regex(r'<qualities defaultType="(\d+)">', webpage, video_id, 0)
+
+        quality = default_quality if default_quality else '40'
+
+        media_path = '/%s/%s/%s' % (series, session, media_id) #self._html_search_regex(r'<quality type="%s">.*<src>(.*)</src>' % default_quality ,webpage, '', flags=re.DOTALL) 
+
+        playpath = 'mp4:media/%s/%s/%s-%s' % (series, session, media_id, quality) #self._html_search_regex(r'<quality type="%s">.*<src>(.*)</src>' % default_quality ,webpage, '', flags=re.DOTALL) 
+
+        subtitles = {}
+
+        subtitle_url = self._html_search_regex(r'<subtitles.*<src>(.*)</src>.*</subtitle>', webpage, video_id, 0)
+
+        print subtitle_url
+
+        if subtitle_url:
+            subtitles_page = self._download_webpage(subtitle_url, video_id, '')
+            subtitles['heb'] = subtitles_page
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': 'rtmp://wafla.walla.co.il:1935/vod',
+            'player_url': 'http://isc.walla.co.il/w9/swf/video_swf/vod/WallaMediaPlayerAvod.swf',
+            'page_url': url,
+            'app': "vod",
+            'play_path': playpath,
+            'tc_url': 'rtmp://wafla.walla.co.il:1935/vod',
+            'rtmp_protocol': 'rtmp',
+            'ext': 'flv',
+            'subtitles': subtitles,
+        }
\ No newline at end of file