[internazionale] Add extractor
[youtube-dl] / youtube_dl / extractor / internazionale.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5
6
7 class InternazionaleIE(InfoExtractor):
8     _VALID_URL = r'https?://(?:www\.)?internazionale\.it/video/(?P<id>.*)'
9     _TESTS = [{
10         'url': 'https://www.internazionale.it/video/2015/02/19/richard-linklater-racconta-una-scena-di-boyhood',
11         'md5': '11b54a3d3333e455c00684e50a65c58e',
12         'info_dict': {
13             'id': '265968',
14             'ext': 'mp4',
15             'description': 'md5:efb7e5bbfb1a54ae2ed5a4a015f0e665',
16             'title': 'Richard Linklater racconta una scena di Boyhood',
17             'thumbnail': r're:^https?://.*\.jpg$',
18         }
19     }]
20
21     def _real_extract(self, url):
22         video_id = self._match_id(url)
23         webpage = self._download_webpage(url, video_id)
24
25         data_job_id = self._html_search_regex(r'data-job-id="([^"]+)"', webpage, 'data-job-id')
26         data_video_path = self._html_search_regex(r'data-video-path="([^"]+)"', webpage, 'data-video-path')
27
28         formats = []
29
30         formats.extend(self._extract_m3u8_formats(
31             'https://video.internazionale.it/%s/%s.m3u8' % (data_video_path, data_job_id),
32             video_id))
33
34         formats.extend(self._extract_mpd_formats(
35             'https://video.internazionale.it/%s/%s.mpd' % (data_video_path, data_job_id),
36             video_id))
37
38         self._sort_formats(formats)
39
40         return {
41             'id': data_job_id,
42             'title': self._og_search_title(webpage),
43             'thumbnail': self._og_search_thumbnail(webpage),
44             'description': self._og_search_description(webpage),
45             'formats': formats,
46         }