[zingmp3] Add fatal flag
[youtube-dl] / youtube_dl / extractor / zingmp3.py
1 # coding=utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import ExtractorError
8
9
10 class ZingMp3BaseInfoExtractor(InfoExtractor):
11
12     def _extract_item(self, item, fatal=True):
13         error_message = item.find('./errormessage').text
14         if error_message:
15             if not fatal:
16                 return
17             raise ExtractorError(
18                 '%s returned error: %s' % (self.IE_NAME, error_message),
19                 expected=True)
20
21         title = item.find('./title').text.strip()
22         source = item.find('./source').text
23         extension = item.attrib['type']
24         thumbnail = item.find('./backimage').text
25
26         return {
27             'title': title,
28             'url': source,
29             'ext': extension,
30             'thumbnail': thumbnail,
31         }
32
33     def _extract_player_xml(self, player_xml_url, id, playlist_title=None):
34         player_xml = self._download_xml(player_xml_url, id, 'Downloading Player XML')
35         items = player_xml.findall('./item')
36
37         if len(items) == 1:
38             # one single song
39             data = self._extract_item(items[0])
40             data['id'] = id
41
42             return data
43         else:
44             # playlist of songs
45             entries = []
46
47             for i, item in enumerate(items, 1):
48                 entry = self._extract_item(item)
49                 entry['id'] = '%s-%d' % (id, i)
50                 entries.append(entry)
51
52             return {
53                 '_type': 'playlist',
54                 'id': id,
55                 'title': playlist_title,
56                 'entries': entries,
57             }
58
59
60 class ZingMp3SongIE(ZingMp3BaseInfoExtractor):
61     _VALID_URL = r'https?://mp3\.zing\.vn/bai-hat/(?P<slug>[^/]+)/(?P<song_id>\w+)\.html'
62     _TESTS = [{
63         'url': 'http://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
64         'md5': 'ead7ae13693b3205cbc89536a077daed',
65         'info_dict': {
66             'id': 'ZWZB9WAB',
67             'title': 'Xa Mãi Xa',
68             'ext': 'mp3',
69             'thumbnail': 're:^https?://.*\.jpg$',
70         },
71     }]
72     IE_NAME = 'zingmp3:song'
73     IE_DESC = 'mp3.zing.vn songs'
74
75     def _real_extract(self, url):
76         matched = re.match(self._VALID_URL, url)
77         slug = matched.group('slug')
78         song_id = matched.group('song_id')
79
80         webpage = self._download_webpage(
81             'http://mp3.zing.vn/bai-hat/%s/%s.html' % (slug, song_id), song_id)
82
83         player_xml_url = self._search_regex(
84             r'&amp;xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url')
85
86         return self._extract_player_xml(player_xml_url, song_id)
87
88
89 class ZingMp3AlbumIE(ZingMp3BaseInfoExtractor):
90     _VALID_URL = r'https?://mp3\.zing\.vn/(?:album|playlist)/(?P<slug>[^/]+)/(?P<album_id>\w+)\.html'
91     _TESTS = [{
92         'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html',
93         'info_dict': {
94             '_type': 'playlist',
95             'id': 'ZWZBWDAF',
96             'title': 'Lâu Đài Tình Ái - Bằng Kiều ft. Minh Tuyết | Album 320 lossless',
97         },
98         'playlist_count': 10,
99     }, {
100         'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html',
101         'only_matching': True,
102     }]
103     IE_NAME = 'zingmp3:album'
104     IE_DESC = 'mp3.zing.vn albums'
105
106     def _real_extract(self, url):
107         matched = re.match(self._VALID_URL, url)
108         slug = matched.group('slug')
109         album_id = matched.group('album_id')
110
111         webpage = self._download_webpage(
112             'http://mp3.zing.vn/album/%s/%s.html' % (slug, album_id), album_id)
113         player_xml_url = self._search_regex(
114             r'&amp;xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url')
115
116         return self._extract_player_xml(
117             player_xml_url, album_id,
118             playlist_title=self._og_search_title(webpage))