X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Fwrzuta.py;h=0f53f1bcb85f409a71d77712b006a57146a9d513;hb=dcdb292fddc82ae11f4c0b647815a45c88a6b6d5;hp=830649214fbd893dc54419a84c95849ce4011e6b;hpb=fea55ef4a95d226668bd63742c4731832de93a79;p=youtube-dl diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py index 830649214..0f53f1bcb 100644 --- a/youtube_dl/extractor/wrzuta.py +++ b/youtube_dl/extractor/wrzuta.py @@ -1,12 +1,14 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, qualities, + remove_start, ) @@ -26,16 +28,17 @@ class WrzutaIE(InfoExtractor): 'uploader_id': 'laboratoriumdextera', 'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd', }, + 'skip': 'Redirected to wrzuta.pl', }, { - 'url': 'http://jolka85.wrzuta.pl/audio/063jOPX5ue2/liber_natalia_szroeder_-_teraz_ty', - 'md5': 'bc78077859bea7bcfe4295d7d7fc9025', + 'url': 'http://vexling.wrzuta.pl/audio/01xBFabGXu6/james_horner_-_into_the_na_39_vi_world_bonus', + 'md5': 'f80564fb5a2ec6ec59705ae2bf2ba56d', 'info_dict': { - 'id': '063jOPX5ue2', - 'ext': 'ogg', - 'title': 'Liber & Natalia Szroeder - Teraz Ty', - 'duration': 203, - 'uploader_id': 'jolka85', - 'description': 'md5:2d2b6340f9188c8c4cd891580e481096', + 'id': '01xBFabGXu6', + 'ext': 'mp3', + 'title': 'James Horner - Into The Na\'vi World [Bonus]', + 'description': 'md5:30a70718b2cd9df3120fce4445b0263b', + 'duration': 95, + 'uploader_id': 'vexling', }, }] @@ -45,7 +48,10 @@ class WrzutaIE(InfoExtractor): typ = mobj.group('typ') uploader = mobj.group('uploader') - webpage = self._download_webpage(url, video_id) + webpage, urlh = self._download_webpage_handle(url, video_id) + + if urlh.geturl() == 'http://www.wrzuta.pl/': + raise ExtractorError('Video removed', expected=True) quality = qualities(['SD', 'MQ', 'HQ', 'HD']) @@ -82,10 +88,6 @@ class WrzutaIE(InfoExtractor): } -_ENTRY_PATTERN = r'' -_PLAYLIST_SIZE_PATTERN = r'
[0-9]+/([0-9]+)
' - - class WrzutaPlaylistIE(InfoExtractor): """ this class covers extraction of wrzuta playlist entries @@ -101,10 +103,7 @@ class WrzutaPlaylistIE(InfoExtractor): """ IE_NAME = 'wrzuta.pl:playlist' - - _VALID_URL = r'https?://(?P[0-9a-zA-Z]+)\.wrzuta\.pl/playlista/' \ - '(?P[0-9a-zA-Z]+)/.*' - + _VALID_URL = r'https?://(?P[0-9a-zA-Z]+)\.wrzuta\.pl/playlista/(?P[0-9a-zA-Z]+)' _TESTS = [{ 'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR/moja_muza', 'playlist_mincount': 14, @@ -119,6 +118,9 @@ class WrzutaPlaylistIE(InfoExtractor): 'id': '6Nj3wQHx756', 'title': 'Lipiec - Lato 2015 Muzyka Świata', }, + }, { + 'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR', + 'only_matching': True, }] def _real_extract(self, url): @@ -126,31 +128,31 @@ class WrzutaPlaylistIE(InfoExtractor): playlist_id = mobj.group('id') uploader = mobj.group('uploader') - entries = [] - webpage = self._download_webpage(url, playlist_id) - playlist_size = self._html_search_regex(_PLAYLIST_SIZE_PATTERN, webpage, 'Size of the playlist') - playlist_size = int(playlist_size) if playlist_size else 0 + playlist_size = int_or_none(self._html_search_regex( + (r']+class=["\']playlist-counter["\'][^>]*>\d+/(\d+)', + r']+class=["\']all-counter["\'][^>]*>(.+?)'), + webpage, 'playlist size', default=None)) - playlist_title = self._og_search_title(webpage).replace('Playlista: ', '', 1) + playlist_title = remove_start( + self._og_search_title(webpage), 'Playlista: ') + entries = [] if playlist_size: - entries = list(map( - lambda entry_url: self.url_result(entry_url), - re.findall(_ENTRY_PATTERN, webpage) - )) - + entries = [ + self.url_result(entry_url) + for _, entry_url in re.findall( + r']+href=(["\'])(http.+?)\1[^>]+class=["\']playlist-file-page', + webpage)] if playlist_size > len(entries): playlist_content = self._download_json( - 'http://{uploader_id}.wrzuta.pl/xhr/get_playlist_offset/{playlist_id}'.format( - uploader_id=uploader, - playlist_id=playlist_id, - ), + 'http://%s.wrzuta.pl/xhr/get_playlist_offset/%s' % (uploader, playlist_id), playlist_id, - 'Downloading playlist content as JSON metadata', - 'Unable to download playlist content as JSON metadata', - ) - entries += [self.url_result(entry['filelink']) for entry in playlist_content['files']] + 'Downloading playlist JSON', + 'Unable to download playlist JSON') + entries.extend([ + self.url_result(entry['filelink']) + for entry in playlist_content.get('files', []) if entry.get('filelink')]) return self.playlist_result(entries, playlist_id, playlist_title)