X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fglobo.py;h=60d842d3a81e801fc9cd11c721c8d14fc9591fb9;hb=HEAD;hp=730deda6b9a172cdddb6166223c52a574024830c;hpb=db2058f63e64ff59ffad0e1e8ad5e18d18d3da71;p=youtube-dl diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index 730deda6b..60d842d3a 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -8,7 +8,10 @@ import random import re from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_HTTPError, + compat_str, +) from ..utils import ( ExtractorError, float_or_none, @@ -20,7 +23,7 @@ from ..utils import ( class GloboIE(InfoExtractor): _VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P\d{7,})' - _LOGGED_IN = False + _NETRC_MACHINE = 'globo' _TESTS = [{ 'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/', 'md5': 'b3ccc801f75cd04a914d51dadb83a78d', @@ -64,24 +67,28 @@ class GloboIE(InfoExtractor): }] def _real_initialize(self): - if self._LOGGED_IN: - return - email, password = self._get_login_info() if email is None: return - self._download_json( - 'https://login.globo.com/api/authentication', None, data=json.dumps({ - 'payload': { - 'email': email, - 'password': password, - 'serviceId': 4654, - }, - }).encode(), headers={ - 'Content-Type': 'application/json; charset=utf-8', - }) - self._LOGGED_IN = True + try: + glb_id = (self._download_json( + 'https://login.globo.com/api/authentication', None, data=json.dumps({ + 'payload': { + 'email': email, + 'password': password, + 'serviceId': 4654, + }, + }).encode(), headers={ + 'Content-Type': 'application/json; charset=utf-8', + }) or {}).get('glbId') + if glb_id: + self._set_cookie('.globo.com', 'GLBID', glb_id) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + resp = self._parse_json(e.cause.read(), None) + raise ExtractorError(resp.get('userMessage') or resp['id'], expected=True) + raise def _real_extract(self, url): video_id = self._match_id(url) @@ -89,21 +96,31 @@ class GloboIE(InfoExtractor): video = self._download_json( 'http://api.globovideos.com/videos/%s/playlist' % video_id, video_id)['videos'][0] + if video.get('encrypted') is True: + raise ExtractorError('This video is DRM protected.', expected=True) title = video['title'] formats = [] + subtitles = {} for resource in video['resources']: resource_id = resource.get('_id') resource_url = resource.get('url') - if not resource_id or not resource_url: + resource_type = resource.get('type') + if not resource_url or (resource_type == 'media' and not resource_id) or resource_type not in ('subtitle', 'media'): + continue + + if resource_type == 'subtitle': + subtitles.setdefault(resource.get('language') or 'por', []).append({ + 'url': resource_url, + }) continue security = self._download_json( 'http://security.video.globo.com/videos/%s/hash' % video_id, video_id, 'Downloading security hash for %s' % resource_id, query={ - 'player': 'flash', - 'version': '17.0.0.132', + 'player': 'desktop', + 'version': '5.19.1', 'resource_id': resource_id, }) @@ -116,18 +133,23 @@ class GloboIE(InfoExtractor): continue hash_code = security_hash[:2] - received_time = int(security_hash[2:12]) - received_random = security_hash[12:22] - received_md5 = security_hash[22:] - - sign_time = received_time + 86400 padding = '%010d' % random.randint(1, 10000000000) - - md5_data = (received_md5 + str(sign_time) + padding + '0xFF01DD').encode() + if hash_code in ('04', '14'): + received_time = security_hash[3:13] + received_md5 = security_hash[24:] + hash_prefix = security_hash[:23] + elif hash_code in ('02', '12', '03', '13'): + received_time = security_hash[2:12] + received_md5 = security_hash[22:] + padding += '1' + hash_prefix = '05' + security_hash[:22] + + padded_sign_time = compat_str(int(received_time) + 86400) + padding + md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode() signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=') - signed_hash = hash_code + compat_str(received_time) + received_random + compat_str(sign_time) + padding + signed_md5 + signed_hash = hash_prefix + padded_sign_time + signed_md5 + signed_url = '%s?h=%s&k=html5&a=%s&u=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A', security.get('user') or '') - signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash') if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'): formats.extend(self._extract_m3u8_formats( signed_url, resource_id, 'mp4', entry_protocol='m3u8_native', @@ -157,7 +179,8 @@ class GloboIE(InfoExtractor): 'duration': duration, 'uploader': uploader, 'uploader_id': uploader_id, - 'formats': formats + 'formats': formats, + 'subtitles': subtitles, }