[tvplayer] Add new extractor
[youtube-dl] / youtube_dl / extractor / tvplayer.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..compat import compat_HTTPError
6 from ..utils import (
7     extract_attributes,
8     urlencode_postdata,
9     ExtractorError,
10 )
11
12
13 class TVPlayerIE(InfoExtractor):
14     _VALID_URL = r'https?://(?:www\.)?tvplayer\.com/watch/(?P<id>[^/?#]+)'
15     _TEST = {
16         'url': 'http://tvplayer.com/watch/bbcone',
17         'info_dict': {
18             'id': '89',
19             'ext': 'mp4',
20             'title': r're:^BBC One [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
21         },
22         'params': {
23             # m3u8 download
24             'skip_download': True,
25         }
26     }
27
28     def _real_extract(self, url):
29         display_id = self._match_id(url)
30         webpage = self._download_webpage(url, display_id)
31
32         current_channel = extract_attributes(self._search_regex(
33             r'(<div[^>]+class="[^"]*current-channel[^"]*"[^>]*>)',
34             webpage, 'channel element'))
35         title = current_channel['data-name']
36
37         resource_id = self._search_regex(
38             r'resourceId\s*=\s*"(\d+)"', webpage, 'resource id')
39         platform = self._search_regex(
40             r'platform\s*=\s*"([^"]+)"', webpage, 'platform')
41         token = self._search_regex(
42             r'token\s*=\s*"([^"]+)"', webpage, 'token', default='null')
43         validate = self._search_regex(
44             r'validate\s*=\s*"([^"]+)"', webpage, 'validate', default='null')
45
46         try:
47             response = self._download_json(
48                 'http://api.tvplayer.com/api/v2/stream/live',
49                 resource_id, headers={
50                     'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
51                 }, data=urlencode_postdata({
52                     'service': 1,
53                     'platform': platform,
54                     'id': resource_id,
55                     'token': token,
56                     'validate': validate,
57                 }))['tvplayer']['response']
58         except ExtractorError as e:
59             if isinstance(e.cause, compat_HTTPError):
60                 response = self._parse_json(
61                     e.cause.read().decode(), resource_id)['tvplayer']['response']
62                 raise ExtractorError(
63                     '%s said: %s' % (self.IE_NAME, response['error']), expected=True)
64             raise
65
66         formats = self._extract_m3u8_formats(response['stream'], resource_id, 'mp4')
67         self._sort_formats(formats)
68
69         return {
70             'id': resource_id,
71             'display_id': display_id,
72             'title': self._live_title(title),
73             'formats': formats,
74             'is_live': True,
75         }