]> git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/musicplayon.py
[brightcove:new] extract subtitles and strip video title
[youtube-dl] / youtube_dl / extractor / musicplayon.py
1 # encoding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import int_or_none
8
9
10 class MusicPlayOnIE(InfoExtractor):
11     _VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=100&play)=(?P<id>\d+)'
12
13     _TEST = {
14         'url': 'http://en.musicplayon.com/play?v=433377',
15         'info_dict': {
16             'id': '433377',
17             'ext': 'mp4',
18             'title': 'Rick Ross - Interview On Chelsea Lately (2014)',
19             'description': 'Rick Ross Interview On Chelsea Lately',
20             'duration': 342,
21             'uploader': 'ultrafish',
22         },
23         'params': {
24             # m3u8 download
25             'skip_download': True,
26         },
27     }
28
29     def _real_extract(self, url):
30         mobj = re.match(self._VALID_URL, url)
31         video_id = mobj.group('id')
32
33         page = self._download_webpage(url, video_id)
34
35         title = self._og_search_title(page)
36         description = self._og_search_description(page)
37         thumbnail = self._og_search_thumbnail(page)
38         duration = self._html_search_meta('video:duration', page, 'duration', fatal=False)
39         view_count = self._og_search_property('count', page, fatal=False)
40         uploader = self._html_search_regex(
41             r'<div>by&nbsp;<a href="[^"]+" class="purple">([^<]+)</a></div>', page, 'uploader', fatal=False)
42
43         formats = [
44             {
45                 'url': 'http://media0-eu-nl.musicplayon.com/stream-mobile?id=%s&type=.mp4' % video_id,
46                 'ext': 'mp4',
47             }
48         ]
49
50         manifest = self._download_webpage(
51             'http://en.musicplayon.com/manifest.m3u8?v=%s' % video_id, video_id, 'Downloading manifest')
52
53         for entry in manifest.split('#')[1:]:
54             if entry.startswith('EXT-X-STREAM-INF:'):
55                 meta, url, _ = entry.split('\n')
56                 params = dict(param.split('=') for param in meta.split(',')[1:])
57                 formats.append({
58                     'url': url,
59                     'ext': 'mp4',
60                     'tbr': int(params['BANDWIDTH']),
61                     'width': int(params['RESOLUTION'].split('x')[1]),
62                     'height': int(params['RESOLUTION'].split('x')[-1]),
63                     'format_note': params['NAME'].replace('"', '').strip(),
64                 })
65
66         return {
67             'id': video_id,
68             'title': title,
69             'description': description,
70             'thumbnail': thumbnail,
71             'uploader': uploader,
72             'duration': int_or_none(duration),
73             'view_count': int_or_none(view_count),
74             'formats': formats,
75         }