[laola1tv] Add new extractor
[youtube-dl] / youtube_dl / extractor / laola1tv.py
1 from __future__ import unicode_literals
2
3 import random
4 import re
5
6 from .common import InfoExtractor
7
8
9 class Laola1TvIE(InfoExtractor):
10     _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/.*?/(?P<id>[0-9]+)\.html'
11     _TEST = {
12         'url': 'http://www.laola1.tv/de-de/live/bwf-bitburger-open-grand-prix-gold-court-1/250019.html',
13         'info_dict': {
14             'id': '250019',
15             'ext': 'mp4',
16             'title': 'Bitburger Open Grand Prix Gold - Court 1',
17             'categories': ['Badminton'],
18             'uploader': 'BWF - Badminton World Federation',
19             'is_live': True,
20         },
21         'params': {
22             'skip_download': True,
23         }
24     }
25
26     def _real_extract(self, url):
27         mobj = re.match(self._VALID_URL, url)
28         video_id = mobj.group('id')
29         lang = mobj.group('lang')
30         portal = mobj.group('portal')
31
32         webpage = self._download_webpage(url, video_id)
33         iframe_url = self._search_regex(
34             r'<iframe[^>]*?class="main_tv_player"[^>]*?src="([^"]+)"',
35             webpage, 'iframe URL')
36
37         iframe = self._download_webpage(
38             iframe_url, video_id, note='Downloading iframe')
39         flashvars_m = re.findall(
40             r'flashvars\.([_a-zA-Z0-9]+)\s*=\s*"([^"]*)";', iframe)
41         flashvars = dict((m[0], m[1]) for m in flashvars_m)
42
43         xml_url = ('http://www.laola1.tv/server/hd_video.php?' +
44                    'play=%s&partner=1&portal=%s&v5ident=&lang=%s' % (
45                        video_id, portal, lang))
46         hd_doc = self._download_xml(xml_url, video_id)
47
48         title = hd_doc.find('.//video/title').text
49         flash_url = hd_doc.find('.//video/url').text
50         categories = hd_doc.find('.//video/meta_sports').text.split(',')
51         uploader = hd_doc.find('.//video/meta_organistation').text
52
53         ident = random.randint(10000000, 99999999)
54         token_url = '%s&ident=%s&klub=0&unikey=0&timestamp=%s&auth=%s' % (
55             flash_url, ident, flashvars['timestamp'], flashvars['auth'])
56
57         token_doc = self._download_xml(
58             token_url, video_id, note='Downloading token')
59         token_attrib = token_doc.find('.//token').attrib
60         if token_attrib.get('auth') == 'blocked':
61             raise ExtractorError('Token error: ' % token_attrib.get('comment'))
62
63         video_url = '%s?hdnea=%s&hdcore=3.2.0' % (
64             token_attrib['url'], token_attrib['auth'])
65
66         return {
67             'id': video_id,
68             'is_live': True,
69             'title': title,
70             'url': video_url,
71             'uploader': uploader,
72             'categories': categories,
73             'ext': 'mp4',
74         }
75