X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fvevo.py;h=c17094f8193f7678cc3d0a912c3d970f38e6bf7c;hb=80240b347e793672909bad21e7781d8b829c6539;hp=ee47c30bab9bd37a5d8e75109f29108864db68d0;hpb=0f2a2ba14b2cbf4bd8bec0ce32f8d7c27a733c68;p=youtube-dl diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index ee47c30ba..c17094f81 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -4,19 +4,22 @@ import re import xml.etree.ElementTree from .common import InfoExtractor +from ..compat import ( + compat_urllib_request, +) from ..utils import ( - compat_HTTPError, ExtractorError, + int_or_none, ) class VevoIE(InfoExtractor): """ Accepts urls from vevo.com or in the format 'vevo:{id}' - (currently used by MTVIE) + (currently used by MTVIE and MySpaceIE) """ _VALID_URL = r'''(?x) - (?:https?://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?| + (?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?| https?://cache\.vevo\.com/m/html/embed\.html\?video=| https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| vevo:) @@ -24,7 +27,7 @@ class VevoIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', - "md5": "06bea460acb744eab74a9d7dcb4bfd61", + "md5": "95ee28ee45e70130e3ab02b0f579ae23", 'info_dict': { 'id': 'GB1101300280', 'ext': 'mp4', @@ -40,7 +43,7 @@ class VevoIE(InfoExtractor): }, { 'note': 'v3 SMIL format', 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', - 'md5': '893ec0e0d4426a1d96c01de8f2bdff58', + 'md5': 'f6ab09b034f8c22969020b042e5ac7fc', 'info_dict': { 'id': 'USUV71302923', 'ext': 'mp4', @@ -69,6 +72,21 @@ class VevoIE(InfoExtractor): }] _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' + def _real_initialize(self): + req = compat_urllib_request.Request( + 'http://www.vevo.com/auth', data=b'') + webpage = self._download_webpage( + req, None, + note='Retrieving oauth token', + errnote='Unable to retrieve oauth token', + fatal=False) + if webpage is False: + self._oauth_token = None + else: + self._oauth_token = self._search_regex( + r'access_token":\s*"([^"]+)"', + webpage, 'access token', fatal=False) + def _formats_from_json(self, video_info): last_version = {'version': -1} for version in video_info['videoVersions']: @@ -129,12 +147,38 @@ class VevoIE(InfoExtractor): }) return formats + def _download_api_formats(self, video_id): + if not self._oauth_token: + self._downloader.report_warning( + 'No oauth token available, skipping API HLS download') + return [] + + api_url = 'https://apiv2.vevo.com/video/%s/streams/hls?token=%s' % ( + video_id, self._oauth_token) + api_data = self._download_json( + api_url, video_id, + note='Downloading HLS formats', + errnote='Failed to download HLS format list', fatal=False) + if api_data is None: + return [] + + m3u8_url = api_data[0]['url'] + return self._extract_m3u8_formats( + m3u8_url, video_id, entry_protocol='m3u8_native', ext='mp4', + preference=0) + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id - video_info = self._download_json(json_url, video_id)['video'] + response = self._download_json(json_url, video_id) + video_info = response['video'] + + if not video_info: + if 'statusMessage' in response: + raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusMessage']), expected=True) + raise ExtractorError('Unable to extract videos') formats = self._formats_from_json(video_info) @@ -146,33 +190,32 @@ class VevoIE(InfoExtractor): else: age_limit = None + # Download via HLS API + formats.extend(self._download_api_formats(video_id)) + # Download SMIL smil_blocks = sorted(( f for f in video_info['videoVersions'] if f['sourceType'] == 13), key=lambda f: f['version']) - smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % ( self._SMIL_BASE_URL, video_id, video_id.lower()) if smil_blocks: smil_url_m = self._search_regex( r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL', - fatal=False) + default=None) if smil_url_m is not None: smil_url = smil_url_m - - try: - smil_xml = self._download_webpage(smil_url, video_id, - 'Downloading SMIL info') - formats.extend(self._formats_from_smil(smil_xml)) - except ExtractorError as ee: - if not isinstance(ee.cause, compat_HTTPError): - raise - self._downloader.report_warning( - 'Cannot download SMIL information, falling back to JSON ..') - - timestamp_ms = int(self._search_regex( - r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date')) + if smil_url: + smil_xml = self._download_webpage( + smil_url, video_id, 'Downloading SMIL info', fatal=False) + if smil_xml: + formats.extend(self._formats_from_smil(smil_xml)) + + self._sort_formats(formats) + timestamp_ms = int_or_none(self._search_regex( + r'/Date\((\d+)\)/', + video_info['launchDate'], 'launch date', fatal=False)) return { 'id': video_id,