[laola1tv] add support embed urls and improve extraction(#11460)
[youtube-dl] / youtube_dl / extractor / laola1tv.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6     ExtractorError,
7     unified_strdate,
8     urlencode_postdata,
9     xpath_element,
10     xpath_text,
11     urljoin,
12 )
13
14
15 class Laola1TvEmbedIE(InfoExtractor):
16     _VALID_URL = r'https?://(?:www\.)?laola1\.tv/titanplayer\.php\?.*?\bvideoid=(?P<id>\d+)'
17
18     def _real_extract(self, url):
19         video_id = self._match_id(url)
20         webpage = self._download_webpage(url, video_id)
21         flash_vars = self._search_regex(
22             r'(?s)flashvars\s*=\s*({.+?});', webpage, 'flash vars')
23         get_flashvar = lambda x: self._search_regex(r'%s\s*:\s*"([^"]+)"' % x, flash_vars, x)
24
25         hd_doc = self._download_xml(
26             'http://www.laola1.tv/server/hd_video.php', video_id, query={
27                 'play': get_flashvar('streamid'),
28                 'partner': get_flashvar('partnerid'),
29                 'portal': get_flashvar('portalid'),
30                 'lang': get_flashvar('sprache'),
31                 'v5ident': '',
32             })
33
34         _v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k)
35         title = _v('title', fatal=True)
36
37         data_abo = urlencode_postdata(
38             dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(','))))
39         token_url = self._download_json(
40             'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access',
41             video_id, query={
42                 'videoId': _v('id'),
43                 'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'),
44                 'label': _v('label'),
45                 'area': _v('area'),
46             }, data=data_abo)['data']['stream-access'][0]
47         token_doc = self._download_xml(
48             token_url, video_id, 'Downloading token',
49             headers=self.geo_verification_headers())
50
51         token_attrib = xpath_element(token_doc, './/token').attrib
52
53         if token_attrib['status'] != '0':
54             raise ExtractorError(
55                 'Token error: %s' % token_attrib['comment'], expected=True)
56
57         formats = self._extract_akamai_formats(
58             '%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']),
59             video_id)
60         self._sort_formats(formats)
61
62         categories_str = _v('meta_sports')
63         categories = categories_str.split(',') if categories_str else []
64         is_live = _v('islive') == 'true'
65
66         return {
67             'id': video_id,
68             'title': self._live_title(title) if is_live else title,
69             'upload_date': unified_strdate(_v('time_date')),
70             'uploader': _v('meta_organisation'),
71             'categories': categories,
72             'is_live': is_live,
73             'formats': formats,
74         }
75
76
77 class Laola1TvIE(InfoExtractor):
78     _VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
79     _TESTS = [{
80         'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
81         'info_dict': {
82             'id': '227883',
83             'display_id': 'straubing-tigers-koelner-haie',
84             'ext': 'flv',
85             'title': 'Straubing Tigers - Kölner Haie',
86             'upload_date': '20140912',
87             'is_live': False,
88             'categories': ['Eishockey'],
89         },
90         'params': {
91             'skip_download': True,
92         },
93     }, {
94         'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie',
95         'info_dict': {
96             'id': '464602',
97             'display_id': 'straubing-tigers-koelner-haie',
98             'ext': 'flv',
99             'title': 'Straubing Tigers - Kölner Haie',
100             'upload_date': '20160129',
101             'is_live': False,
102             'categories': ['Eishockey'],
103         },
104         'params': {
105             'skip_download': True,
106         },
107     }, {
108         'url': 'http://www.laola1.tv/de-de/livestream/2016-03-22-belogorie-belgorod-trentino-diatec-lde',
109         'info_dict': {
110             'id': '487850',
111             'display_id': '2016-03-22-belogorie-belgorod-trentino-diatec-lde',
112             'ext': 'flv',
113             'title': 'Belogorie BELGOROD - TRENTINO Diatec',
114             'upload_date': '20160322',
115             'uploader': 'CEV - Europäischer Volleyball Verband',
116             'is_live': True,
117             'categories': ['Volleyball'],
118         },
119         'params': {
120             'skip_download': True,
121         },
122         'skip': 'This live stream has already finished.',
123     }]
124
125     def _real_extract(self, url):
126         display_id = self._match_id(url)
127
128         webpage = self._download_webpage(url, display_id)
129
130         if 'Dieser Livestream ist bereits beendet.' in webpage:
131             raise ExtractorError('This live stream has already finished.', expected=True)
132
133         iframe_url = urljoin(url, self._search_regex(
134             r'<iframe[^>]*?id="videoplayer"[^>]*?src="([^"]+)"',
135             webpage, 'iframe url'))
136
137         return {
138             '_type': 'url',
139             'display_id': display_id,
140             'url': iframe_url,
141             'ie_key': 'Laola1TvEmbed',
142         }