_ Git - youtube-dl/blob - youtube_dl/extractor/funk.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from .nexx import NexxIE
   8 from ..compat import compat_str
   9 from ..utils import (
  10     int_or_none,
  11     try_get,
  12 )
  13
  14
  15 class FunkBaseIE(InfoExtractor):
  16     _HEADERS = {
  17         'Accept': '*/*',
  18         'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8',
  19         'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4',
  20     }
  21     _AUTH = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4'
  22
  23     @staticmethod
  24     def _make_headers(referer):
  25         headers = FunkBaseIE._HEADERS.copy()
  26         headers['Referer'] = referer
  27         return headers
  28
  29     def _make_url_result(self, video):
  30         return {
  31             '_type': 'url_transparent',
  32             'url': 'nexx:741:%s' % video['sourceId'],
  33             'ie_key': NexxIE.ie_key(),
  34             'id': video['sourceId'],
  35             'title': video.get('title'),
  36             'description': video.get('description'),
  37             'duration': int_or_none(video.get('duration')),
  38             'season_number': int_or_none(video.get('seasonNr')),
  39             'episode_number': int_or_none(video.get('episodeNr')),
  40         }
  41
  42
  43 class FunkMixIE(FunkBaseIE):
  44     _VALID_URL = r'https?://(?:www\.)?funk\.net/mix/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
  45     _TESTS = [{
  46         'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/die-realste-kifferdoku-aller-zeiten',
  47         'md5': '8edf617c2f2b7c9847dfda313f199009',
  48         'info_dict': {
  49             'id': '123748',
  50             'ext': 'mp4',
  51             'title': '"Die realste Kifferdoku aller Zeiten"',
  52             'description': 'md5:c97160f5bafa8d47ec8e2e461012aa9d',
  53             'timestamp': 1490274721,
  54             'upload_date': '20170323',
  55         },
  56     }]
  57
  58     def _real_extract(self, url):
  59         mobj = re.match(self._VALID_URL, url)
  60         mix_id = mobj.group('id')
  61         alias = mobj.group('alias')
  62
  63         lists = self._download_json(
  64             'https://www.funk.net/api/v3.1/curation/curatedLists/',
  65             mix_id, headers=self._make_headers(url), query={
  66                 'size': 100,
  67             })['_embedded']['curatedListList']
  68
  69         metas = next(
  70             l for l in lists
  71             if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
  72         video = next(
  73             meta['videoDataDelegate']
  74             for meta in metas
  75             if try_get(
  76                 meta, lambda x: x['videoDataDelegate']['alias'],
  77                 compat_str) == alias)
  78
  79         return self._make_url_result(video)
  80
  81
  82 class FunkChannelIE(FunkBaseIE):
  83     _VALID_URL = r'https?://(?:www\.)?funk\.net/channel/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
  84     _TESTS = [{
  85         'url': 'https://www.funk.net/channel/ba/die-lustigsten-instrumente-aus-dem-internet-teil-2',
  86         'info_dict': {
  87             'id': '1155821',
  88             'ext': 'mp4',
  89             'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2',
  90             'description': 'md5:a691d0413ef4835588c5b03ded670c1f',
  91             'timestamp': 1514507395,
  92             'upload_date': '20171229',
  93         },
  94         'params': {
  95             'skip_download': True,
  96         },
  97     }, {
  98         # only available via byIdList API
  99         'url': 'https://www.funk.net/channel/informr/martin-sonneborn-erklaert-die-eu',
 100         'info_dict': {
 101             'id': '205067',
 102             'ext': 'mp4',
 103             'title': 'Martin Sonneborn erklärt die EU',
 104             'description': 'md5:050f74626e4ed87edf4626d2024210c0',
 105             'timestamp': 1494424042,
 106             'upload_date': '20170510',
 107         },
 108         'params': {
 109             'skip_download': True,
 110         },
 111     }, {
 112         'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/',
 113         'only_matching': True,
 114     }]
 115
 116     def _real_extract(self, url):
 117         mobj = re.match(self._VALID_URL, url)
 118         channel_id = mobj.group('id')
 119         alias = mobj.group('alias')
 120
 121         headers = self._make_headers(url)
 122
 123         video = None
 124
 125         # Id-based channels are currently broken on their side: webplayer
 126         # tries to process them via byChannelAlias endpoint and fails
 127         # predictably.
 128         by_channel_alias = self._download_json(
 129             'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
 130             % channel_id,
 131             'Downloading byChannelAlias JSON', headers=headers, query={
 132                 'size': 100,
 133             }, fatal=False)
 134         if by_channel_alias:
 135             video_list = try_get(
 136                 by_channel_alias, lambda x: x['_embedded']['videoList'], list)
 137             if video_list:
 138                 video = next(r for r in video_list if r.get('alias') == alias)
 139
 140         if not video:
 141             by_id_list = self._download_json(
 142                 'https://www.funk.net/api/v3.0/content/videos/byIdList',
 143                 channel_id, 'Downloading byIdList JSON', headers=headers,
 144                 query={
 145                     'ids': alias,
 146                 }, fatal=False)
 147             if by_id_list:
 148                 video = try_get(by_id_list, lambda x: x['result'][0], dict)
 149
 150         if not video:
 151             results = self._download_json(
 152                 'https://www.funk.net/api/v3.0/content/videos/filter',
 153                 channel_id, 'Downloading filter JSON', headers=headers, query={
 154                     'channelId': channel_id,
 155                     'size': 100,
 156                 })['result']
 157             video = next(r for r in results if r.get('alias') == alias)
 158
 159         return self._make_url_result(video)