[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / digiteka.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import int_or_none
8
9
10 class DigitekaIE(InfoExtractor):
11     _VALID_URL = r'''(?x)
12         https?://(?:www\.)?(?:digiteka\.net|ultimedia\.com)/
13         (?:
14             deliver/
15             (?P<embed_type>
16                 generic|
17                 musique
18             )
19             (?:/[^/]+)*/
20             (?:
21                 src|
22                 article
23             )|
24             default/index/video
25             (?P<site_type>
26                 generic|
27                 music
28             )
29             /id
30         )/(?P<id>[\d+a-z]+)'''
31     _TESTS = [{
32         # news
33         'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
34         'md5': '276a0e49de58c7e85d32b057837952a2',
35         'info_dict': {
36             'id': 's8uk0r',
37             'ext': 'mp4',
38             'title': 'Loi sur la fin de vie: le texte prĂ©voit un renforcement des directives anticipĂ©es',
39             'thumbnail': r're:^https?://.*\.jpg',
40             'duration': 74,
41             'upload_date': '20150317',
42             'timestamp': 1426604939,
43             'uploader_id': '3fszv',
44         },
45     }, {
46         # music
47         'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
48         'md5': '2ea3513813cf230605c7e2ffe7eca61c',
49         'info_dict': {
50             'id': 'xvpfp8',
51             'ext': 'mp4',
52             'title': 'Two - C\'est La Vie (clip)',
53             'thumbnail': r're:^https?://.*\.jpg',
54             'duration': 233,
55             'upload_date': '20150224',
56             'timestamp': 1424760500,
57             'uploader_id': '3rfzk',
58         },
59     }, {
60         'url': 'https://www.digiteka.net/deliver/generic/iframe/mdtk/01637594/src/lqm3kl/zone/1/showtitle/1/autoplay/yes',
61         'only_matching': True,
62     }]
63
64     @staticmethod
65     def _extract_url(webpage):
66         mobj = re.search(
67             r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)',
68             webpage)
69         if mobj:
70             return mobj.group('url')
71
72     def _real_extract(self, url):
73         mobj = re.match(self._VALID_URL, url)
74         video_id = mobj.group('id')
75         video_type = mobj.group('embed_type') or mobj.group('site_type')
76         if video_type == 'music':
77             video_type = 'musique'
78
79         deliver_info = self._download_json(
80             'http://www.ultimedia.com/deliver/video?video=%s&topic=%s' % (video_id, video_type),
81             video_id)
82
83         yt_id = deliver_info.get('yt_id')
84         if yt_id:
85             return self.url_result(yt_id, 'Youtube')
86
87         jwconf = deliver_info['jwconf']
88
89         formats = []
90         for source in jwconf['playlist'][0]['sources']:
91             formats.append({
92                 'url': source['file'],
93                 'format_id': source.get('label'),
94             })
95
96         self._sort_formats(formats)
97
98         title = deliver_info['title']
99         thumbnail = jwconf.get('image')
100         duration = int_or_none(deliver_info.get('duration'))
101         timestamp = int_or_none(deliver_info.get('release_time'))
102         uploader_id = deliver_info.get('owner_id')
103
104         return {
105             'id': video_id,
106             'title': title,
107             'thumbnail': thumbnail,
108             'duration': duration,
109             'timestamp': timestamp,
110             'uploader_id': uploader_id,
111             'formats': formats,
112         }