Merge pull request #7691 from ryandesign/use-PYTHON-env-var
[youtube-dl] / youtube_dl / extractor / mpora.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..utils import int_or_none
5
6
7 class MporaIE(InfoExtractor):
8     _VALID_URL = r'https?://(?:www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
9     IE_NAME = 'MPORA'
10
11     _TEST = {
12         'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de',
13         'md5': 'a7a228473eedd3be741397cf452932eb',
14         'info_dict': {
15             'id': 'AAdo8okx4wiz',
16             'ext': 'mp4',
17             'title': 'Katy Curd -  Winter in the Forest',
18             'duration': 416,
19             'uploader': 'Peter Newman Media',
20         },
21     }
22
23     def _real_extract(self, url):
24         video_id = self._match_id(url)
25         webpage = self._download_webpage(url, video_id)
26
27         data_json = self._search_regex(
28             [r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;",
29              r"new\s+FM\.Kaltura\.Player\('[^']+'\s*,\s*({.+?})\);"],
30             webpage, 'json')
31         data = self._parse_json(data_json, video_id)
32
33         uploader = data['info_overlay'].get('username')
34         duration = data['video']['duration'] // 1000
35         thumbnail = data['video']['encodings']['sd']['poster']
36         title = data['info_overlay']['title']
37
38         formats = []
39         for encoding_id, edata in data['video']['encodings'].items():
40             for src in edata['sources']:
41                 width_str = self._search_regex(
42                     r'_([0-9]+)\.[a-zA-Z0-9]+$', src['src'],
43                     False, default=None)
44                 vcodec = src['type'].partition('/')[2]
45
46                 formats.append({
47                     'format_id': encoding_id + '-' + vcodec,
48                     'url': src['src'],
49                     'vcodec': vcodec,
50                     'width': int_or_none(width_str),
51                 })
52
53         self._sort_formats(formats)
54
55         return {
56             'id': video_id,
57             'title': title,
58             'formats': formats,
59             'uploader': uploader,
60             'duration': duration,
61             'thumbnail': thumbnail,
62         }