[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / tvplayer.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..compat import (
6     compat_HTTPError,
7     compat_str,
8 )
9 from ..utils import (
10     extract_attributes,
11     try_get,
12     urlencode_postdata,
13     ExtractorError,
14 )
15
16
17 class TVPlayerIE(InfoExtractor):
18     _VALID_URL = r'https?://(?:www\.)?tvplayer\.com/watch/(?P<id>[^/?#]+)'
19     _TEST = {
20         'url': 'http://tvplayer.com/watch/bbcone',
21         'info_dict': {
22             'id': '89',
23             'ext': 'mp4',
24             'title': r're:^BBC One [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
25         },
26         'params': {
27             # m3u8 download
28             'skip_download': True,
29         }
30     }
31
32     def _real_extract(self, url):
33         display_id = self._match_id(url)
34         webpage = self._download_webpage(url, display_id)
35
36         current_channel = extract_attributes(self._search_regex(
37             r'(<div[^>]+class="[^"]*current-channel[^"]*"[^>]*>)',
38             webpage, 'channel element'))
39         title = current_channel['data-name']
40
41         resource_id = current_channel['data-id']
42
43         token = self._search_regex(
44             r'data-token=(["\'])(?P<token>(?!\1).+)\1', webpage,
45             'token', group='token')
46
47         context = self._download_json(
48             'https://tvplayer.com/watch/context', display_id,
49             'Downloading JSON context', query={
50                 'resource': resource_id,
51                 'gen': token,
52             })
53
54         validate = context['validate']
55         platform = try_get(
56             context, lambda x: x['platform']['key'], compat_str) or 'firefox'
57
58         try:
59             response = self._download_json(
60                 'http://api.tvplayer.com/api/v2/stream/live',
61                 display_id, 'Downloading JSON stream', headers={
62                     'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
63                 }, data=urlencode_postdata({
64                     'id': resource_id,
65                     'service': 1,
66                     'platform': platform,
67                     'validate': validate,
68                 }))['tvplayer']['response']
69         except ExtractorError as e:
70             if isinstance(e.cause, compat_HTTPError):
71                 response = self._parse_json(
72                     e.cause.read().decode(), resource_id)['tvplayer']['response']
73                 raise ExtractorError(
74                     '%s said: %s' % (self.IE_NAME, response['error']), expected=True)
75             raise
76
77         formats = self._extract_m3u8_formats(response['stream'], display_id, 'mp4')
78         self._sort_formats(formats)
79
80         return {
81             'id': resource_id,
82             'display_id': display_id,
83             'title': self._live_title(title),
84             'formats': formats,
85             'is_live': True,
86         }