Merge branch 'pr-twitter' of https://github.com/atomicdryad/youtube-dl into atomicdry...
[youtube-dl] / youtube_dl / extractor / zingmp3.py
1 # coding=utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import ExtractorError
8
9
10 class ZingMp3BaseInfoExtractor(InfoExtractor):
11
12     def _extract_item(self, item, fatal=True):
13         error_message = item.find('./errormessage').text
14         if error_message:
15             if not fatal:
16                 return
17             raise ExtractorError(
18                 '%s returned error: %s' % (self.IE_NAME, error_message),
19                 expected=True)
20
21         title = item.find('./title').text.strip()
22         source = item.find('./source').text
23         extension = item.attrib['type']
24         thumbnail = item.find('./backimage').text
25
26         return {
27             'title': title,
28             'url': source,
29             'ext': extension,
30             'thumbnail': thumbnail,
31         }
32
33     def _extract_player_xml(self, player_xml_url, id, playlist_title=None):
34         player_xml = self._download_xml(player_xml_url, id, 'Downloading Player XML')
35         items = player_xml.findall('./item')
36
37         if len(items) == 1:
38             # one single song
39             data = self._extract_item(items[0])
40             data['id'] = id
41
42             return data
43         else:
44             # playlist of songs
45             entries = []
46
47             for i, item in enumerate(items, 1):
48                 entry = self._extract_item(item, fatal=False)
49                 if not entry:
50                     continue
51                 entry['id'] = '%s-%d' % (id, i)
52                 entries.append(entry)
53
54             return {
55                 '_type': 'playlist',
56                 'id': id,
57                 'title': playlist_title,
58                 'entries': entries,
59             }
60
61
62 class ZingMp3SongIE(ZingMp3BaseInfoExtractor):
63     _VALID_URL = r'https?://mp3\.zing\.vn/bai-hat/(?P<slug>[^/]+)/(?P<song_id>\w+)\.html'
64     _TESTS = [{
65         'url': 'http://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
66         'md5': 'ead7ae13693b3205cbc89536a077daed',
67         'info_dict': {
68             'id': 'ZWZB9WAB',
69             'title': 'Xa Mãi Xa',
70             'ext': 'mp3',
71             'thumbnail': 're:^https?://.*\.jpg$',
72         },
73     }]
74     IE_NAME = 'zingmp3:song'
75     IE_DESC = 'mp3.zing.vn songs'
76
77     def _real_extract(self, url):
78         matched = re.match(self._VALID_URL, url)
79         slug = matched.group('slug')
80         song_id = matched.group('song_id')
81
82         webpage = self._download_webpage(
83             'http://mp3.zing.vn/bai-hat/%s/%s.html' % (slug, song_id), song_id)
84
85         player_xml_url = self._search_regex(
86             r'&amp;xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url')
87
88         return self._extract_player_xml(player_xml_url, song_id)
89
90
91 class ZingMp3AlbumIE(ZingMp3BaseInfoExtractor):
92     _VALID_URL = r'https?://mp3\.zing\.vn/(?:album|playlist)/(?P<slug>[^/]+)/(?P<album_id>\w+)\.html'
93     _TESTS = [{
94         'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html',
95         'info_dict': {
96             '_type': 'playlist',
97             'id': 'ZWZBWDAF',
98             'title': 'Lâu Đài Tình Ái - Bằng Kiều ft. Minh Tuyết | Album 320 lossless',
99         },
100         'playlist_count': 10,
101     }, {
102         'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html',
103         'only_matching': True,
104     }]
105     IE_NAME = 'zingmp3:album'
106     IE_DESC = 'mp3.zing.vn albums'
107
108     def _real_extract(self, url):
109         matched = re.match(self._VALID_URL, url)
110         slug = matched.group('slug')
111         album_id = matched.group('album_id')
112
113         webpage = self._download_webpage(
114             'http://mp3.zing.vn/album/%s/%s.html' % (slug, album_id), album_id)
115         player_xml_url = self._search_regex(
116             r'&amp;xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url')
117
118         return self._extract_player_xml(
119             player_xml_url, album_id,
120             playlist_title=self._og_search_title(webpage))