[rottentomatoes] Fix extraction
[youtube-dl] / youtube_dl / extractor / rottentomatoes.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..utils import js_to_json
5
6
7 class RottenTomatoesIE(InfoExtractor):
8     _VALID_URL = r'https?://www\.rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)'
9
10     _TEST = {
11         'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
12         'info_dict': {
13             'id': '11028566',
14             'ext': 'mp4',
15             'title': 'Toy Story 3',
16             'thumbnail': 're:^https?://.*\.jpg$',
17         },
18     }
19
20     def _real_extract(self, url):
21         video_id = self._match_id(url)
22         webpage = self._download_webpage(url, video_id)
23
24         params = self._parse_json(
25             self._search_regex(r'(?s)RTVideo\(({.+?})\);', webpage, 'player parameters'),
26             video_id, transform_source=lambda s: js_to_json(s.replace('window.location.href', '""')))
27
28         formats = []
29         if params.get('urlHLS'):
30             formats.extend(self._extract_m3u8_formats(
31                 params['urlHLS'], video_id, ext='mp4',
32                 entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
33         if params.get('urlMP4'):
34             formats.append({
35                 'url': params['urlMP4'],
36                 'format_id': 'mp4',
37             })
38         self._sort_formats(formats)
39
40         return {
41             'id': video_id,
42             'title': self._og_search_title(webpage),
43             'formats': formats,
44             'thumbnail': params.get('thumbnailImg'),
45         }