[funk] Fix extraction (closes #16918)
[youtube-dl] / youtube_dl / extractor / funk.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from .nexx import NexxIE
8 from ..compat import compat_str
9 from ..utils import (
10     int_or_none,
11     try_get,
12 )
13
14
15 class FunkBaseIE(InfoExtractor):
16     _HEADERS = {
17         'Accept': '*/*',
18         'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8',
19         'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4',
20     }
21     _AUTH = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4'
22
23     @staticmethod
24     def _make_headers(referer):
25         headers = FunkBaseIE._HEADERS.copy()
26         headers['Referer'] = referer
27         return headers
28
29     def _make_url_result(self, video):
30         return {
31             '_type': 'url_transparent',
32             'url': 'nexx:741:%s' % video['sourceId'],
33             'ie_key': NexxIE.ie_key(),
34             'id': video['sourceId'],
35             'title': video.get('title'),
36             'description': video.get('description'),
37             'duration': int_or_none(video.get('duration')),
38             'season_number': int_or_none(video.get('seasonNr')),
39             'episode_number': int_or_none(video.get('episodeNr')),
40         }
41
42
43 class FunkMixIE(FunkBaseIE):
44     _VALID_URL = r'https?://(?:www\.)?funk\.net/mix/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
45     _TESTS = [{
46         'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/die-realste-kifferdoku-aller-zeiten',
47         'md5': '8edf617c2f2b7c9847dfda313f199009',
48         'info_dict': {
49             'id': '123748',
50             'ext': 'mp4',
51             'title': '"Die realste Kifferdoku aller Zeiten"',
52             'description': 'md5:c97160f5bafa8d47ec8e2e461012aa9d',
53             'timestamp': 1490274721,
54             'upload_date': '20170323',
55         },
56     }]
57
58     def _real_extract(self, url):
59         mobj = re.match(self._VALID_URL, url)
60         mix_id = mobj.group('id')
61         alias = mobj.group('alias')
62
63         lists = self._download_json(
64             'https://www.funk.net/api/v3.1/curation/curatedLists/',
65             mix_id, headers=self._make_headers(url), query={
66                 'size': 100,
67             })['_embedded']['curatedListList']
68
69         metas = next(
70             l for l in lists
71             if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
72         video = next(
73             meta['videoDataDelegate']
74             for meta in metas
75             if try_get(
76                 meta, lambda x: x['videoDataDelegate']['alias'],
77                 compat_str) == alias)
78
79         return self._make_url_result(video)
80
81
82 class FunkChannelIE(FunkBaseIE):
83     _VALID_URL = r'https?://(?:www\.)?funk\.net/channel/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
84     _TESTS = [{
85         'url': 'https://www.funk.net/channel/ba/die-lustigsten-instrumente-aus-dem-internet-teil-2',
86         'info_dict': {
87             'id': '1155821',
88             'ext': 'mp4',
89             'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2',
90             'description': 'md5:a691d0413ef4835588c5b03ded670c1f',
91             'timestamp': 1514507395,
92             'upload_date': '20171229',
93         },
94         'params': {
95             'skip_download': True,
96         },
97     }, {
98         # only available via byIdList API
99         'url': 'https://www.funk.net/channel/informr/martin-sonneborn-erklaert-die-eu',
100         'info_dict': {
101             'id': '205067',
102             'ext': 'mp4',
103             'title': 'Martin Sonneborn erklärt die EU',
104             'description': 'md5:050f74626e4ed87edf4626d2024210c0',
105             'timestamp': 1494424042,
106             'upload_date': '20170510',
107         },
108         'params': {
109             'skip_download': True,
110         },
111     }, {
112         'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/',
113         'only_matching': True,
114     }]
115
116     def _real_extract(self, url):
117         mobj = re.match(self._VALID_URL, url)
118         channel_id = mobj.group('id')
119         alias = mobj.group('alias')
120
121         headers = self._make_headers(url)
122
123         video = None
124
125         # Id-based channels are currently broken on their side: webplayer
126         # tries to process them via byChannelAlias endpoint and fails
127         # predictably.
128         by_channel_alias = self._download_json(
129             'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
130             % channel_id,
131             'Downloading byChannelAlias JSON', headers=headers, query={
132                 'size': 100,
133             }, fatal=False)
134         if by_channel_alias:
135             video_list = try_get(
136                 by_channel_alias, lambda x: x['_embedded']['videoList'], list)
137             if video_list:
138                 video = next(r for r in video_list if r.get('alias') == alias)
139
140         if not video:
141             by_id_list = self._download_json(
142                 'https://www.funk.net/api/v3.0/content/videos/byIdList',
143                 channel_id, 'Downloading byIdList JSON', headers=headers,
144                 query={
145                     'ids': alias,
146                 }, fatal=False)
147             if by_id_list:
148                 video = try_get(by_id_list, lambda x: x['result'][0], dict)
149
150         if not video:
151             results = self._download_json(
152                 'https://www.funk.net/api/v3.0/content/videos/filter',
153                 channel_id, 'Downloading filter JSON', headers=headers, query={
154                     'channelId': channel_id,
155                     'size': 100,
156                 })['result']
157             video = next(r for r in results if r.get('alias') == alias)
158
159         return self._make_url_result(video)