[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / walla.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8     xpath_text,
9     int_or_none,
10 )
11
12
13 class WallaIE(InfoExtractor):
14     _VALID_URL = r'https?://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)'
15     _TEST = {
16         'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one',
17         'info_dict': {
18             'id': '2642630',
19             'display_id': 'one-direction-all-for-one',
20             'ext': 'flv',
21             'title': 'וואן דיירקשן: ההיסטריה',
22             'description': 'md5:de9e2512a92442574cdb0913c49bc4d8',
23             'thumbnail': r're:^https?://.*\.jpg',
24             'duration': 3600,
25         },
26         'params': {
27             # rtmp download
28             'skip_download': True,
29         }
30     }
31
32     _SUBTITLE_LANGS = {
33         'עברית': 'heb',
34     }
35
36     def _real_extract(self, url):
37         mobj = re.match(self._VALID_URL, url)
38         video_id = mobj.group('id')
39         display_id = mobj.group('display_id')
40
41         video = self._download_xml(
42             'http://video2.walla.co.il/?w=null/null/%s/@@/video/flv_pl' % video_id,
43             display_id)
44
45         item = video.find('./items/item')
46
47         title = xpath_text(item, './title', 'title')
48         description = xpath_text(item, './synopsis', 'description')
49         thumbnail = xpath_text(item, './preview_pic', 'thumbnail')
50         duration = int_or_none(xpath_text(item, './duration', 'duration'))
51
52         subtitles = {}
53         for subtitle in item.findall('./subtitles/subtitle'):
54             lang = xpath_text(subtitle, './title')
55             subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
56                 'ext': 'srt',
57                 'url': xpath_text(subtitle, './src'),
58             }]
59
60         formats = []
61         for quality in item.findall('./qualities/quality'):
62             format_id = xpath_text(quality, './title')
63             fmt = {
64                 'url': 'rtmp://wafla.walla.co.il/vod',
65                 'play_path': xpath_text(quality, './src'),
66                 'player_url': 'http://isc.walla.co.il/w9/swf/video_swf/vod/WallaMediaPlayerAvod.swf',
67                 'page_url': url,
68                 'ext': 'flv',
69                 'format_id': xpath_text(quality, './title'),
70             }
71             m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
72             if m:
73                 fmt['height'] = int(m.group('height'))
74             formats.append(fmt)
75         self._sort_formats(formats)
76
77         return {
78             'id': video_id,
79             'display_id': display_id,
80             'title': title,
81             'description': description,
82             'thumbnail': thumbnail,
83             'duration': duration,
84             'formats': formats,
85             'subtitles': subtitles,
86         }