X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fbeatportpro.py;h=3c7775d3e2762bef7f9424ffa769346dae2d577f;hb=2bc43303031215436b201e656094b60ab3ec7e9e;hp=bc201572ec7d677504997159bb7b90f8cbbf906d;hpb=1b53778175e43e2bf2cb71885a760d96727ee837;p=youtube-dl diff --git a/youtube_dl/extractor/beatportpro.py b/youtube_dl/extractor/beatportpro.py index bc201572e..3c7775d3e 100644 --- a/youtube_dl/extractor/beatportpro.py +++ b/youtube_dl/extractor/beatportpro.py @@ -1,20 +1,21 @@ # coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor - import re -import json + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import int_or_none class BeatportProIE(InfoExtractor): - _VALID_URL = r'https?://pro\.beatport\.com/track/.*/(?P[0-9]+)' + _VALID_URL = r'https?://pro\.beatport\.com/track/(?P[^/]+)/(?P[0-9]+)' _TESTS = [{ 'url': 'https://pro.beatport.com/track/synesthesia-original-mix/5379371', 'md5': 'b3c34d8639a2f6a7f734382358478887', 'info_dict': { - 'id': 5379371, - 'display-id': 'synesthesia-original-mix', + 'id': '5379371', + 'display_id': 'synesthesia-original-mix', 'ext': 'mp4', 'title': 'Froxic - Synesthesia (Original Mix)', }, @@ -22,8 +23,8 @@ class BeatportProIE(InfoExtractor): 'url': 'https://pro.beatport.com/track/love-and-war-original-mix/3756896', 'md5': 'e44c3025dfa38c6577fbaeb43da43514', 'info_dict': { - 'id': 3756896, - 'display-id': 'love-and-war-original-mix', + 'id': '3756896', + 'display_id': 'love-and-war-original-mix', 'ext': 'mp3', 'title': 'Wolfgang Gartner - Love & War (Original Mix)', }, @@ -31,34 +32,35 @@ class BeatportProIE(InfoExtractor): 'url': 'https://pro.beatport.com/track/birds-original-mix/4991738', 'md5': 'a1fd8e8046de3950fd039304c186c05f', 'info_dict': { - 'id': 4991738, - 'display-id': 'birds-original-mix', + 'id': '4991738', + 'display_id': 'birds-original-mix', 'ext': 'mp4', 'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)", } }] def _real_extract(self, url): - track_id = self._match_id(url) - webpage = self._download_webpage(url, track_id) + mobj = re.match(self._VALID_URL, url) + track_id = mobj.group('id') + display_id = mobj.group('display_id') + + webpage = self._download_webpage(url, display_id) - # Extract "Playables" JSON information from the page - playables = self._search_regex(r'window\.Playables = ({.*?});', webpage, - 'playables info', flags=re.DOTALL) - playables = json.loads(playables) + playables = self._parse_json( + self._search_regex( + r'window\.Playables\s*=\s*({.+?});', webpage, + 'playables info', flags=re.DOTALL), + track_id) - # Find first track with matching ID (always the first one listed?) track = next(t for t in playables['tracks'] if t['id'] == int(track_id)) - # Construct title from artist(s), track name, and mix name title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name'] if track['mix']: title += ' (' + track['mix'] + ')' - # Get format information formats = [] for ext, info in track['preview'].items(): - if info['url'] is None: + if not info['url']: continue fmt = { 'url': info['url'], @@ -76,26 +78,26 @@ class BeatportProIE(InfoExtractor): fmt['acodec'] = 'aac' fmt['abr'] = 96 fmt['asr'] = 44100 - formats += [fmt] + formats.append(fmt) self._sort_formats(formats) - # Get album art as thumbnails - imgs = [] + images = [] for name, info in track['images'].items(): - if name == 'dynamic' or info['url'] is None: + image_url = info.get('url') + if name == 'dynamic' or not image_url: continue - img = { + image = { 'id': name, - 'url': info['url'], - 'height': info['height'], - 'width': info['width'], + 'url': image_url, + 'height': int_or_none(info.get('height')), + 'width': int_or_none(info.get('width')), } - imgs += [img] + images.append(image) return { - 'id': track['id'], - 'display-id': track['slug'], + 'id': compat_str(track.get('id')) or track_id, + 'display_id': track.get('slug') or display_id, 'title': title, 'formats': formats, - 'thumbnails': imgs, + 'thumbnails': images, }