[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / rutv.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8     ExtractorError,
9     int_or_none
10 )
11
12
13 class RUTVIE(InfoExtractor):
14     IE_DESC = 'RUTV.RU'
15     _VALID_URL = r'''(?x)
16                     https?://
17                         (?:test)?player\.(?:rutv\.ru|vgtrk\.com)/
18                         (?P<path>
19                             flash\d+v/container\.swf\?id=|
20                             iframe/(?P<type>swf|video|live)/id/|
21                             index/iframe/cast_id/
22                         )
23                         (?P<id>\d+)
24                     '''
25
26     _TESTS = [
27         {
28             'url': 'http://player.rutv.ru/flash2v/container.swf?id=774471&sid=kultura&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972347/video_id/978186/brand_id/31724',
29             'info_dict': {
30                 'id': '774471',
31                 'ext': 'mp4',
32                 'title': 'Монологи на все времена',
33                 'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5',
34                 'duration': 2906,
35             },
36             'params': {
37                 # m3u8 download
38                 'skip_download': True,
39             },
40         },
41         {
42             'url': 'https://player.vgtrk.com/flash2v/container.swf?id=774016&sid=russiatv&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972098/video_id/977760/brand_id/57638',
43             'info_dict': {
44                 'id': '774016',
45                 'ext': 'mp4',
46                 'title': 'Чужой в семье Сталина',
47                 'description': '',
48                 'duration': 2539,
49             },
50             'params': {
51                 # m3u8 download
52                 'skip_download': True,
53             },
54         },
55         {
56             'url': 'http://player.rutv.ru/iframe/swf/id/766888/sid/hitech/?acc_video_id=4000',
57             'info_dict': {
58                 'id': '766888',
59                 'ext': 'mp4',
60                 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"',
61                 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995',
62                 'duration': 279,
63             },
64             'params': {
65                 # m3u8 download
66                 'skip_download': True,
67             },
68         },
69         {
70             'url': 'http://player.rutv.ru/iframe/video/id/771852/start_zoom/true/showZoomBtn/false/sid/russiatv/?acc_video_id=episode_id/970443/video_id/975648/brand_id/5169',
71             'info_dict': {
72                 'id': '771852',
73                 'ext': 'mp4',
74                 'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет',
75                 'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8',
76                 'duration': 3096,
77             },
78             'params': {
79                 # m3u8 download
80                 'skip_download': True,
81             },
82         },
83         {
84             'url': 'http://player.rutv.ru/iframe/live/id/51499/showZoomBtn/false/isPlay/true/sid/sochi2014',
85             'info_dict': {
86                 'id': '51499',
87                 'ext': 'flv',
88                 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
89                 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
90             },
91             'skip': 'Translation has finished',
92         },
93         {
94             'url': 'http://player.rutv.ru/iframe/live/id/21/showZoomBtn/false/isPlay/true/',
95             'info_dict': {
96                 'id': '21',
97                 'ext': 'mp4',
98                 'title': 're:^Россия 24. Прямой эфир [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
99                 'is_live': True,
100             },
101             'params': {
102                 # m3u8 download
103                 'skip_download': True,
104             },
105         },
106         {
107             'url': 'https://testplayer.vgtrk.com/iframe/live/id/19201/showZoomBtn/false/isPlay/true/',
108             'only_matching': True,
109         },
110     ]
111
112     @classmethod
113     def _extract_url(cls, webpage):
114         mobj = re.search(
115             r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
116         if mobj:
117             return mobj.group('url')
118
119         mobj = re.search(
120             r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)',
121             webpage)
122         if mobj:
123             return mobj.group('url')
124
125     def _real_extract(self, url):
126         mobj = re.match(self._VALID_URL, url)
127         video_id = mobj.group('id')
128         video_path = mobj.group('path')
129
130         if re.match(r'flash\d+v', video_path):
131             video_type = 'video'
132         elif video_path.startswith('iframe'):
133             video_type = mobj.group('type')
134             if video_type == 'swf':
135                 video_type = 'video'
136         elif video_path.startswith('index/iframe/cast_id'):
137             video_type = 'live'
138
139         is_live = video_type == 'live'
140
141         json_data = self._download_json(
142             'http://player.rutv.ru/iframe/data%s/id/%s' % ('live' if is_live else 'video', video_id),
143             video_id, 'Downloading JSON')
144
145         if json_data['errors']:
146             raise ExtractorError('%s said: %s' % (self.IE_NAME, json_data['errors']), expected=True)
147
148         playlist = json_data['data']['playlist']
149         medialist = playlist['medialist']
150         media = medialist[0]
151
152         if media['errors']:
153             raise ExtractorError('%s said: %s' % (self.IE_NAME, media['errors']), expected=True)
154
155         view_count = playlist.get('count_views')
156         priority_transport = playlist['priority_transport']
157
158         thumbnail = media['picture']
159         width = int_or_none(media['width'])
160         height = int_or_none(media['height'])
161         description = media['anons']
162         title = media['title']
163         duration = int_or_none(media.get('duration'))
164
165         formats = []
166
167         for transport, links in media['sources'].items():
168             for quality, url in links.items():
169                 preference = -1 if priority_transport == transport else -2
170                 if transport == 'rtmp':
171                     mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url)
172                     if not mobj:
173                         continue
174                     fmt = {
175                         'url': mobj.group('url'),
176                         'play_path': mobj.group('playpath'),
177                         'app': mobj.group('app'),
178                         'page_url': 'http://player.rutv.ru',
179                         'player_url': 'http://player.rutv.ru/flash3v/osmf.swf?i=22',
180                         'rtmp_live': True,
181                         'ext': 'flv',
182                         'vbr': int(quality),
183                         'preference': preference,
184                     }
185                 elif transport == 'm3u8':
186                     formats.extend(self._extract_m3u8_formats(
187                         url, video_id, 'mp4', preference=preference, m3u8_id='hls'))
188                     continue
189                 else:
190                     fmt = {
191                         'url': url
192                     }
193                 fmt.update({
194                     'width': width,
195                     'height': height,
196                     'format_id': '%s-%s' % (transport, quality),
197                 })
198                 formats.append(fmt)
199
200         self._sort_formats(formats)
201
202         return {
203             'id': video_id,
204             'title': self._live_title(title) if is_live else title,
205             'description': description,
206             'thumbnail': thumbnail,
207             'view_count': view_count,
208             'duration': duration,
209             'formats': formats,
210             'is_live': is_live,
211         }