Merge branch 'douyutv' of https://github.com/bonfy/youtube-dl into bonfy-douyutv
[youtube-dl] / youtube_dl / extractor / playwire.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import (
7     xpath_text,
8     float_or_none,
9     int_or_none,
10 )
11
12
13 class PlaywireIE(InfoExtractor):
14     _VALID_URL = r'https?://(?:config|cdn)\.playwire\.com(?:/v2)?/(?P<publisher_id>\d+)/(?:videos/v2|embed|config)/(?P<id>\d+)'
15     _TESTS = [{
16         'url': 'http://config.playwire.com/14907/videos/v2/3353705/player.json',
17         'md5': 'e6398701e3595888125729eaa2329ed9',
18         'info_dict': {
19             'id': '3353705',
20             'ext': 'mp4',
21             'title': 'S04_RM_UCL_Rus',
22             'thumbnail': 're:^http://.*\.png$',
23             'duration': 145.94,
24         },
25     }, {
26         'url': 'http://cdn.playwire.com/11625/embed/85228.html',
27         'only_matching': True,
28     }, {
29         'url': 'http://config.playwire.com/12421/videos/v2/3389892/zeus.json',
30         'only_matching': True,
31     }, {
32         'url': 'http://cdn.playwire.com/v2/12342/config/1532636.json',
33         'only_matching': True,
34     }]
35
36     def _real_extract(self, url):
37         mobj = re.match(self._VALID_URL, url)
38         publisher_id, video_id = mobj.group('publisher_id'), mobj.group('id')
39
40         player = self._download_json(
41             'http://config.playwire.com/%s/videos/v2/%s/zeus.json' % (publisher_id, video_id),
42             video_id)
43
44         title = player['settings']['title']
45         duration = float_or_none(player.get('duration'), 1000)
46
47         content = player['content']
48         thumbnail = content.get('poster')
49         src = content['media']['f4m']
50
51         f4m = self._download_xml(src, video_id)
52         base_url = xpath_text(f4m, './{http://ns.adobe.com/f4m/1.0}baseURL', 'base url', fatal=True)
53         formats = []
54         for media in f4m.findall('./{http://ns.adobe.com/f4m/1.0}media'):
55             media_url = media.get('url')
56             if not media_url:
57                 continue
58             tbr = int_or_none(media.get('bitrate'))
59             width = int_or_none(media.get('width'))
60             height = int_or_none(media.get('height'))
61             f = {
62                 'url': '%s/%s' % (base_url, media.attrib['url']),
63                 'tbr': tbr,
64                 'width': width,
65                 'height': height,
66             }
67             if not (tbr or width or height):
68                 f['quality'] = 1 if '-hd.' in media_url else 0
69             formats.append(f)
70         self._sort_formats(formats)
71
72         return {
73             'id': video_id,
74             'title': title,
75             'thumbnail': thumbnail,
76             'duration': duration,
77             'formats': formats,
78         }