X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fmixcloud.py;h=0efbe660a5d2a88b41e198078da84baf8bfc7a9e;hb=9cafc3fd8b54b9b91a145cddf9e4db0bd59e1b5f;hp=483f6925fda989fc5111694c8c82f1807a1f3d97;hpb=b6c0d4f4315a282257adc6ab980b4cf4bfb3d418;p=youtube-dl diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 483f6925f..0efbe660a 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -16,13 +16,12 @@ from ..utils import ( clean_html, ExtractorError, OnDemandPagedList, - parse_count, str_to_int, ) class MixcloudIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)' + _VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)' IE_NAME = 'mixcloud' _TESTS = [{ @@ -34,9 +33,8 @@ class MixcloudIE(InfoExtractor): 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', 'uploader': 'Daniel Holbach', 'uploader_id': 'dholbach', - 'thumbnail': 're:https?://.*\.jpg', + 'thumbnail': r're:https?://.*\.jpg', 'view_count': int, - 'like_count': int, }, }, { 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', @@ -49,8 +47,10 @@ class MixcloudIE(InfoExtractor): 'uploader_id': 'gillespeterson', 'thumbnail': 're:https?://.*', 'view_count': int, - 'like_count': int, }, + }, { + 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/', + 'only_matching': True, }] # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js @@ -86,27 +86,19 @@ class MixcloudIE(InfoExtractor): song_url = play_info['stream_url'] - PREFIX = ( - r'm-play-on-spacebar[^>]+' - r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') - title = self._html_search_regex( - PREFIX + r'm-title="([^"]+)"', webpage, 'title') + title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title') thumbnail = self._proto_relative_url(self._html_search_regex( - PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', - fatal=False)) + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False)) uploader = self._html_search_regex( - PREFIX + r'm-owner-name="([^"]+)"', - webpage, 'uploader', fatal=False) + r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False) uploader_id = self._search_regex( r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) description = self._og_search_description(webpage) - like_count = parse_count(self._search_regex( - r'\bbutton-favorite[^>]+>.*?]+class=["\']toggle-number[^>]+>\s*([^<]+)', - webpage, 'like count', fatal=False)) view_count = str_to_int(self._search_regex( [r'([0-9,.]+)'], - webpage, 'play count', fatal=False)) + r'/listeners/?">([0-9,.]+)', + r'(?:m|data)-tooltip=["\']([\d,.]+) plays'], + webpage, 'play count', default=None)) return { 'id': track_id, @@ -117,7 +109,6 @@ class MixcloudIE(InfoExtractor): 'uploader': uploader, 'uploader_id': uploader_id, 'view_count': view_count, - 'like_count': like_count, } @@ -147,12 +138,12 @@ class MixcloudPlaylistBaseIE(InfoExtractor): def _get_user_description(self, page_content): return self._html_search_regex( - r']+class="description-text"[^>]*>(.+?)', + r']+class="profile-bio"[^>]*>(.+?)', page_content, 'user description', fatal=False) class MixcloudUserIE(MixcloudPlaylistBaseIE): - _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P[^/]+)/(?Puploads|favorites|listens)?/?$' + _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P[^/]+)/(?Puploads|favorites|listens)?/?$' IE_NAME = 'mixcloud:user' _TESTS = [{ @@ -160,7 +151,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE): 'info_dict': { 'id': 'dholbach_uploads', 'title': 'Daniel Holbach (uploads)', - 'description': 'md5:327af72d1efeb404a8216c27240d1370', + 'description': 'md5:def36060ac8747b3aabca54924897e47', }, 'playlist_mincount': 11, }, { @@ -168,7 +159,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE): 'info_dict': { 'id': 'dholbach_uploads', 'title': 'Daniel Holbach (uploads)', - 'description': 'md5:327af72d1efeb404a8216c27240d1370', + 'description': 'md5:def36060ac8747b3aabca54924897e47', }, 'playlist_mincount': 11, }, { @@ -176,7 +167,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE): 'info_dict': { 'id': 'dholbach_favorites', 'title': 'Daniel Holbach (favorites)', - 'description': 'md5:327af72d1efeb404a8216c27240d1370', + 'description': 'md5:def36060ac8747b3aabca54924897e47', }, 'params': { 'playlist_items': '1-100', @@ -187,7 +178,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE): 'info_dict': { 'id': 'dholbach_listens', 'title': 'Daniel Holbach (listens)', - 'description': 'md5:327af72d1efeb404a8216c27240d1370', + 'description': 'md5:def36060ac8747b3aabca54924897e47', }, 'params': { 'playlist_items': '1-100', @@ -225,7 +216,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE): class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): - _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P[^/]+)/playlists/(?P[^/]+)/?$' + _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P[^/]+)/playlists/(?P[^/]+)/?$' IE_NAME = 'mixcloud:playlist' _TESTS = [{ @@ -238,12 +229,7 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): 'playlist_mincount': 16, }, { 'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/', - 'info_dict': { - 'id': 'maxvibes_jazzcat-on-ness-radio', - 'title': 'Jazzcat on Ness Radio', - 'description': 'md5:7bbbf0d6359a0b8cda85224be0f8f263', - }, - 'playlist_mincount': 23 + 'only_matching': True, }] def _real_extract(self, url): @@ -252,15 +238,16 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): playlist_id = mobj.group('playlist') video_id = '%s_%s' % (user_id, playlist_id) - profile = self._download_webpage( + webpage = self._download_webpage( url, user_id, note='Downloading playlist page', errnote='Unable to download playlist page') - description = self._get_user_description(profile) - playlist_title = self._html_search_regex( - r']+class="[^"]*list-playlist-title[^"]*"[^>]*>(.*?)', - profile, 'playlist title') + title = self._html_search_regex( + r']+class="parent active"[^>]*>\d+]*>([^<]+)', + webpage, 'playlist title', + default=None) or self._og_search_title(webpage, fatal=False) + description = self._get_user_description(webpage) entries = OnDemandPagedList( functools.partial( @@ -268,11 +255,11 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): '%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'), self._PAGE_SIZE) - return self.playlist_result(entries, video_id, playlist_title, description) + return self.playlist_result(entries, video_id, title, description) class MixcloudStreamIE(MixcloudPlaylistBaseIE): - _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P[^/]+)/stream/?$' + _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P[^/]+)/stream/?$' IE_NAME = 'mixcloud:stream' _TEST = {