git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/walla.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..utils import (
   8     xpath_text,
   9     int_or_none,
  10 )
  11
  12
  13 class WallaIE(InfoExtractor):
  14     _VALID_URL = r'https?://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)'
  15     _TEST = {
  16         'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one',
  17         'info_dict': {
  18             'id': '2642630',
  19             'display_id': 'one-direction-all-for-one',
  20             'ext': 'flv',
  21             'title': 'וואן דיירקשן: ההיסטריה',
  22             'description': 'md5:de9e2512a92442574cdb0913c49bc4d8',
  23             'thumbnail': 're:^https?://.*\.jpg',
  24             'duration': 3600,
  25         },
  26         'params': {
  27             # rtmp download
  28             'skip_download': True,
  29         }
  30     }
  31
  32     _SUBTITLE_LANGS = {
  33         'עברית': 'heb',
  34     }
  35
  36     def _real_extract(self, url):
  37         mobj = re.match(self._VALID_URL, url)
  38         video_id = mobj.group('id')
  39         display_id = mobj.group('display_id')
  40
  41         video = self._download_xml(
  42             'http://video2.walla.co.il/?w=null/null/%s/@@/video/flv_pl' % video_id,
  43             display_id)
  44
  45         item = video.find('./items/item')
  46
  47         title = xpath_text(item, './title', 'title')
  48         description = xpath_text(item, './synopsis', 'description')
  49         thumbnail = xpath_text(item, './preview_pic', 'thumbnail')
  50         duration = int_or_none(xpath_text(item, './duration', 'duration'))
  51
  52         subtitles = {}
  53         for subtitle in item.findall('./subtitles/subtitle'):
  54             lang = xpath_text(subtitle, './title')
  55             subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
  56                 'ext': 'srt',
  57                 'url': xpath_text(subtitle, './src'),
  58             }]
  59
  60         formats = []
  61         for quality in item.findall('./qualities/quality'):
  62             format_id = xpath_text(quality, './title')
  63             fmt = {
  64                 'url': 'rtmp://wafla.walla.co.il/vod',
  65                 'play_path': xpath_text(quality, './src'),
  66                 'player_url': 'http://isc.walla.co.il/w9/swf/video_swf/vod/WallaMediaPlayerAvod.swf',
  67                 'page_url': url,
  68                 'ext': 'flv',
  69                 'format_id': xpath_text(quality, './title'),
  70             }
  71             m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
  72             if m:
  73                 fmt['height'] = int(m.group('height'))
  74             formats.append(fmt)
  75         self._sort_formats(formats)
  76
  77         return {
  78             'id': video_id,
  79             'display_id': display_id,
  80             'title': title,
  81             'description': description,
  82             'thumbnail': thumbnail,
  83             'duration': duration,
  84             'formats': formats,
  85             'subtitles': subtitles,
  86         }