[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / sportdeutschland.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8     parse_iso8601,
9     sanitized_Request,
10 )
11
12
13 class SportDeutschlandIE(InfoExtractor):
14     _VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])'
15     _TESTS = [{
16         'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
17         'info_dict': {
18             'id': 're-live-deutsche-meisterschaften-2020-halbfinals',
19             'ext': 'mp4',
20             'title': 're:Re-live: Deutsche Meisterschaften 2020.*Halbfinals',
21             'categories': ['Badminton-Deutschland'],
22             'view_count': int,
23             'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
24             'timestamp': int,
25             'upload_date': '20200201',
26             'description': 're:.*',  # meaningless description for THIS video
27         },
28     }]
29
30     def _real_extract(self, url):
31         mobj = re.match(self._VALID_URL, url)
32         video_id = mobj.group('id')
33         sport_id = mobj.group('sport')
34
35         api_url = 'https://proxy.vidibusdynamic.net/ssl/backend.sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
36             sport_id, video_id)
37         req = sanitized_Request(api_url, headers={
38             'Accept': 'application/vnd.vidibus.v2.html+json',
39             'Referer': url,
40         })
41         data = self._download_json(req, video_id)
42
43         asset = data['asset']
44         categories = [data['section']['title']]
45
46         formats = []
47         smil_url = asset['video']
48         if '.smil' in smil_url:
49             m3u8_url = smil_url.replace('.smil', '.m3u8')
50             formats.extend(
51                 self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4'))
52
53             smil_doc = self._download_xml(
54                 smil_url, video_id, note='Downloading SMIL metadata')
55             base_url_el = smil_doc.find('./head/meta')
56             if base_url_el:
57                 base_url = base_url_el.attrib['base']
58             formats.extend([{
59                 'format_id': 'rmtp',
60                 'url': base_url if base_url_el else n.attrib['src'],
61                 'play_path': n.attrib['src'],
62                 'ext': 'flv',
63                 'preference': -100,
64                 'format_note': 'Seems to fail at example stream',
65             } for n in smil_doc.findall('./body/video')])
66         else:
67             formats.append({'url': smil_url})
68
69         self._sort_formats(formats)
70
71         return {
72             'id': video_id,
73             'formats': formats,
74             'title': asset['title'],
75             'thumbnail': asset.get('image'),
76             'description': asset.get('teaser'),
77             'duration': asset.get('duration'),
78             'categories': categories,
79             'view_count': asset.get('views'),
80             'rtmp_live': asset.get('live'),
81             'timestamp': parse_iso8601(asset.get('date')),
82         }