_ Git - youtube-dl/blob - youtube_dl/extractor/zingmp3.py

   1 # coding=utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7
   8
   9 class ZingMp3BaseInfoExtractor(InfoExtractor):
  10
  11     @staticmethod
  12     def _extract_item(item):
  13         title = item.find('./title').text.strip()
  14         source = item.find('./source').text
  15         extension = item.attrib['type']
  16         thumbnail = item.find('./backimage').text
  17
  18         return {
  19             'title': title,
  20             'url': source,
  21             'ext': extension,
  22             'thumbnail': thumbnail,
  23         }
  24
  25     def _extract_player_xml(self, player_xml_url, id, playlist_title=None):
  26         player_xml = self._download_xml(player_xml_url, id, 'Downloading Player XML')
  27
  28         self.report_extraction(id)
  29         items = player_xml.findall('./item')
  30
  31         if len(items) == 1:
  32             # one single song
  33             data = self._extract_item(items[0])
  34             data['id'] = id
  35
  36             return data
  37         else:
  38             # playlist of songs
  39             entries = []
  40
  41             for i, item in enumerate(items, 1):
  42                 entry = self._extract_item(item)
  43                 entry['id'] = '%s-%d' % (id, i)
  44                 entries.append(entry)
  45
  46             return {
  47                 '_type': 'playlist',
  48                 'id': id,
  49                 'title': playlist_title,
  50                 'entries': entries,
  51             }
  52
  53
  54 class ZingMp3SongIE(ZingMp3BaseInfoExtractor):
  55     _VALID_URL = r'https?://mp3\.zing\.vn/bai-hat/(?P<slug>[^/]+)/(?P<song_id>\w+)\.html'
  56     _TESTS = [{
  57         'url': 'http://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html',
  58         'info_dict': {
  59             'id': 'ZWZB9WAB',
  60             'title': u'Xa Mãi Xa',
  61             'ext': 'mp3',
  62         },
  63         'md5': 'ead7ae13693b3205cbc89536a077daed',
  64     }]
  65     IE_NAME = 'zingmp3:song'
  66     IE_DESC = 'mp3.zing.vn songs'
  67
  68     def _real_extract(self, url):
  69         matched = re.match(self._VALID_URL, url)
  70         slug = matched.group('slug')
  71         song_id = matched.group('song_id')
  72
  73         webpage = self._download_webpage('http://mp3.zing.vn/bai-hat/%s/%s.html' % (slug, song_id), song_id)
  74
  75         player_xml_url = self._search_regex(r'&amp;xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url')
  76
  77         return self._extract_player_xml(player_xml_url, song_id)
  78
  79
  80 class ZingMp3AlbumIE(ZingMp3BaseInfoExtractor):
  81     _VALID_URL = r'https?://mp3\.zing\.vn/album/(?P<slug>[^/]+)/(?P<album_id>\w+)\.html'
  82     _TESTS = [{
  83         'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html',
  84         'info_dict': {
  85             '_type': 'playlist',
  86             'id': 'ZWZBWDAF',
  87             'title': u'Lâu Đài Tình Ái - Bằng Kiều ft. Minh Tuyết | Album 320 lossless',
  88         },
  89         'playlist_count': 10,
  90     }]
  91     IE_NAME = 'zingmp3:album'
  92     IE_DESC = 'mp3.zing.vn albums'
  93
  94     def _real_extract(self, url):
  95         matched = re.match(self._VALID_URL, url)
  96         slug = matched.group('slug')
  97         album_id = matched.group('album_id')
  98
  99         webpage = self._download_webpage('http://mp3.zing.vn/album/%s/%s.html' % (slug, album_id), album_id)
 100
 101         player_xml_url = self._search_regex(r'&amp;xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url')
 102
 103         return self._extract_player_xml(player_xml_url, album_id, playlist_title=self._og_search_title(webpage))