[zingmp3] Capture error message
[youtube-dl] / youtube_dl / extractor / zingmp3.py
1 # coding=utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import ExtractorError
8
9
10 class ZingMp3BaseInfoExtractor(InfoExtractor):
11
12     def _extract_item(self, item):
13         error_message = item.find('./errormessage').text
14         if error_message:
15             raise ExtractorError(
16                 '%s returned error: %s' % (self.IE_NAME, error_message),
17                 expected=True)
18
19         title = item.find('./title').text.strip()
20         source = item.find('./source').text
21         extension = item.attrib['type']
22         thumbnail = item.find('./backimage').text
23
24         return {
25             'title': title,
26             'url': source,
27             'ext': extension,
28             'thumbnail': thumbnail,
29         }
30
31     def _extract_player_xml(self, player_xml_url, id, playlist_title=None):
32         player_xml = self._download_xml(player_xml_url, id, 'Downloading Player XML')
33         items = player_xml.findall('./item')
34
35         if len(items) == 1:
36             # one single song
37             data = self._extract_item(items[0])
38             data['id'] = id
39
40             return data
41         else:
42             # playlist of songs
43             entries = []
44
45             for i, item in enumerate(items, 1):
46                 entry = self._extract_item(item)
47                 entry['id'] = '%s-%d' % (id, i)
48                 entries.append(entry)
49
50             return {
51                 '_type': 'playlist',
52                 'id': id,
53                 'title': playlist_title,
54                 'entries': entries,
55             }
56
57
58 class ZingMp3SongIE(ZingMp3BaseInfoExtractor):
59     _VALID_URL = r'https?://mp3\.zing\.vn/bai-hat/(?P<slug>[^/]+)/(?P<song_id>\w+)\.html'
60     _TESTS = [{
61         'url': 'http://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
62         'md5': 'ead7ae13693b3205cbc89536a077daed',
63         'info_dict': {
64             'id': 'ZWZB9WAB',
65             'title': 'Xa Mãi Xa',
66             'ext': 'mp3',
67             'thumbnail': 're:^https?://.*\.jpg$',
68         },
69     }]
70     IE_NAME = 'zingmp3:song'
71     IE_DESC = 'mp3.zing.vn songs'
72
73     def _real_extract(self, url):
74         matched = re.match(self._VALID_URL, url)
75         slug = matched.group('slug')
76         song_id = matched.group('song_id')
77
78         webpage = self._download_webpage(
79             'http://mp3.zing.vn/bai-hat/%s/%s.html' % (slug, song_id), song_id)
80
81         player_xml_url = self._search_regex(
82             r'&amp;xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url')
83
84         return self._extract_player_xml(player_xml_url, song_id)
85
86
87 class ZingMp3AlbumIE(ZingMp3BaseInfoExtractor):
88     _VALID_URL = r'https?://mp3\.zing\.vn/album/(?P<slug>[^/]+)/(?P<album_id>\w+)\.html'
89     _TESTS = [{
90         'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html',
91         'info_dict': {
92             '_type': 'playlist',
93             'id': 'ZWZBWDAF',
94             'title': 'Lâu Đài Tình Ái - Bằng Kiều ft. Minh Tuyết | Album 320 lossless',
95         },
96         'playlist_count': 10,
97     }]
98     IE_NAME = 'zingmp3:album'
99     IE_DESC = 'mp3.zing.vn albums'
100
101     def _real_extract(self, url):
102         matched = re.match(self._VALID_URL, url)
103         slug = matched.group('slug')
104         album_id = matched.group('album_id')
105
106         webpage = self._download_webpage(
107             'http://mp3.zing.vn/album/%s/%s.html' % (slug, album_id), album_id)
108         player_xml_url = self._search_regex(
109             r'&amp;xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url')
110
111         return self._extract_player_xml(
112             player_xml_url, album_id,
113             playlist_title=self._og_search_title(webpage))