[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / byutv.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import (
7     determine_ext,
8     merge_dicts,
9     parse_duration,
10     url_or_none,
11 )
12
13
14 class BYUtvIE(InfoExtractor):
15     _VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
16     _TESTS = [{
17         # ooyalaVOD
18         'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
19         'info_dict': {
20             'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH',
21             'display_id': 'studio-c-season-5-episode-5',
22             'ext': 'mp4',
23             'title': 'Season 5 Episode 5',
24             'description': 'md5:1d31dc18ef4f075b28f6a65937d22c65',
25             'thumbnail': r're:^https?://.*',
26             'duration': 1486.486,
27         },
28         'params': {
29             'skip_download': True,
30         },
31         'add_ie': ['Ooyala'],
32     }, {
33         # dvr
34         'url': 'https://www.byutv.org/player/8f1dab9b-b243-47c8-b525-3e2d021a3451/byu-softball-pacific-vs-byu-41219---game-2',
35         'info_dict': {
36             'id': '8f1dab9b-b243-47c8-b525-3e2d021a3451',
37             'display_id': 'byu-softball-pacific-vs-byu-41219---game-2',
38             'ext': 'mp4',
39             'title': 'Pacific vs. BYU (4/12/19)',
40             'description': 'md5:1ac7b57cb9a78015910a4834790ce1f3',
41             'duration': 11645,
42         },
43         'params': {
44             'skip_download': True
45         },
46     }, {
47         'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
48         'only_matching': True,
49     }, {
50         'url': 'https://www.byutv.org/player/27741493-dc83-40b0-8420-e7ae38a2ae98/byu-football-toledo-vs-byu-93016?listid=4fe0fee5-0d3c-4a29-b725-e4948627f472&listindex=0&q=toledo',
51         'only_matching': True,
52     }]
53
54     def _real_extract(self, url):
55         mobj = re.match(self._VALID_URL, url)
56         video_id = mobj.group('id')
57         display_id = mobj.group('display_id') or video_id
58
59         video = self._download_json(
60             'https://api.byutv.org/api3/catalog/getvideosforcontent',
61             display_id, query={
62                 'contentid': video_id,
63                 'channel': 'byutv',
64                 'x-byutv-context': 'web$US',
65             }, headers={
66                 'x-byutv-context': 'web$US',
67                 'x-byutv-platformkey': 'xsaaw9c7y5',
68             })
69
70         ep = video.get('ooyalaVOD')
71         if ep:
72             return {
73                 '_type': 'url_transparent',
74                 'ie_key': 'Ooyala',
75                 'url': 'ooyala:%s' % ep['providerId'],
76                 'id': video_id,
77                 'display_id': display_id,
78                 'title': ep.get('title'),
79                 'description': ep.get('description'),
80                 'thumbnail': ep.get('imageThumbnail'),
81             }
82
83         info = {}
84         formats = []
85         for format_id, ep in video.items():
86             if not isinstance(ep, dict):
87                 continue
88             video_url = url_or_none(ep.get('videoUrl'))
89             if not video_url:
90                 continue
91             ext = determine_ext(video_url)
92             if ext == 'm3u8':
93                 formats.extend(self._extract_m3u8_formats(
94                     video_url, video_id, 'mp4', entry_protocol='m3u8_native',
95                     m3u8_id='hls', fatal=False))
96             elif ext == 'mpd':
97                 formats.extend(self._extract_mpd_formats(
98                     video_url, video_id, mpd_id='dash', fatal=False))
99             else:
100                 formats.append({
101                     'url': video_url,
102                     'format_id': format_id,
103                 })
104             merge_dicts(info, {
105                 'title': ep.get('title'),
106                 'description': ep.get('description'),
107                 'thumbnail': ep.get('imageThumbnail'),
108                 'duration': parse_duration(ep.get('length')),
109             })
110         self._sort_formats(formats)
111
112         return merge_dicts(info, {
113             'id': video_id,
114             'display_id': display_id,
115             'title': display_id,
116             'formats': formats,
117         })