[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / philharmoniedeparis.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..compat import compat_str
6 from ..utils import (
7     try_get,
8     urljoin,
9 )
10
11
12 class PhilharmonieDeParisIE(InfoExtractor):
13     IE_DESC = 'Philharmonie de Paris'
14     _VALID_URL = r'''(?x)
15                     https?://
16                         (?:
17                             live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|embed(?:app)?/|misc/Playlist\.ashx\?id=)|
18                             pad\.philharmoniedeparis\.fr/doc/CIMU/
19                         )
20                         (?P<id>\d+)
21                     '''
22     _TESTS = [{
23         'url': 'http://pad.philharmoniedeparis.fr/doc/CIMU/1086697/jazz-a-la-villette-knower',
24         'md5': 'a0a4b195f544645073631cbec166a2c2',
25         'info_dict': {
26             'id': '1086697',
27             'ext': 'mp4',
28             'title': 'Jazz à la Villette : Knower',
29         },
30     }, {
31         'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html',
32         'info_dict': {
33             'id': '1032066',
34             'title': 'md5:0a031b81807b3593cffa3c9a87a167a0',
35         },
36         'playlist_mincount': 2,
37     }, {
38         'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html',
39         'only_matching': True,
40     }, {
41         'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr',
42         'only_matching': True,
43     }, {
44         'url': 'https://live.philharmoniedeparis.fr/embedapp/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR',
45         'only_matching': True,
46     }, {
47         'url': 'https://live.philharmoniedeparis.fr/embed/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR',
48         'only_matching': True,
49     }]
50     _LIVE_URL = 'https://live.philharmoniedeparis.fr'
51
52     def _real_extract(self, url):
53         video_id = self._match_id(url)
54
55         config = self._download_json(
56             '%s/otoPlayer/config.ashx' % self._LIVE_URL, video_id, query={
57                 'id': video_id,
58                 'lang': 'fr-FR',
59             })
60
61         def extract_entry(source):
62             if not isinstance(source, dict):
63                 return
64             title = source.get('title')
65             if not title:
66                 return
67             files = source.get('files')
68             if not isinstance(files, dict):
69                 return
70             format_urls = set()
71             formats = []
72             for format_id in ('mobile', 'desktop'):
73                 format_url = try_get(
74                     files, lambda x: x[format_id]['file'], compat_str)
75                 if not format_url or format_url in format_urls:
76                     continue
77                 format_urls.add(format_url)
78                 m3u8_url = urljoin(self._LIVE_URL, format_url)
79                 formats.extend(self._extract_m3u8_formats(
80                     m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
81                     m3u8_id='hls', fatal=False))
82             if not formats:
83                 return
84             self._sort_formats(formats)
85             return {
86                 'title': title,
87                 'formats': formats,
88             }
89
90         thumbnail = urljoin(self._LIVE_URL, config.get('image'))
91
92         info = extract_entry(config)
93         if info:
94             info.update({
95                 'id': video_id,
96                 'thumbnail': thumbnail,
97             })
98             return info
99
100         entries = []
101         for num, chapter in enumerate(config['chapters'], start=1):
102             entry = extract_entry(chapter)
103             entry['id'] = '%s-%d' % (video_id, num)
104             entries.append(entry)
105
106         return self.playlist_result(entries, video_id, config.get('title'))