[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / globo.py
index 81d6d36d325eb0af5c03a6e93536c34ae2da5bce..60d842d3a81e801fc9cd11c721c8d14fc9591fb9 100644 (file)
@@ -23,7 +23,6 @@ from ..utils import (
 
 class GloboIE(InfoExtractor):
     _VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
-    _LOGGED_IN = False
     _NETRC_MACHINE = 'globo'
     _TESTS = [{
         'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
@@ -68,15 +67,12 @@ class GloboIE(InfoExtractor):
     }]
 
     def _real_initialize(self):
-        if self._LOGGED_IN:
-            return
-
         email, password = self._get_login_info()
         if email is None:
             return
 
         try:
-            self._download_json(
+            glb_id = (self._download_json(
                 'https://login.globo.com/api/authentication', None, data=json.dumps({
                     'payload': {
                         'email': email,
@@ -85,13 +81,14 @@ class GloboIE(InfoExtractor):
                     },
                 }).encode(), headers={
                     'Content-Type': 'application/json; charset=utf-8',
-                })
+                }) or {}).get('glbId')
+            if glb_id:
+                self._set_cookie('.globo.com', 'GLBID', glb_id)
         except ExtractorError as e:
             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
                 resp = self._parse_json(e.cause.read(), None)
                 raise ExtractorError(resp.get('userMessage') or resp['id'], expected=True)
             raise
-        self._LOGGED_IN = True
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -99,21 +96,31 @@ class GloboIE(InfoExtractor):
         video = self._download_json(
             'http://api.globovideos.com/videos/%s/playlist' % video_id,
             video_id)['videos'][0]
+        if video.get('encrypted') is True:
+            raise ExtractorError('This video is DRM protected.', expected=True)
 
         title = video['title']
 
         formats = []
+        subtitles = {}
         for resource in video['resources']:
             resource_id = resource.get('_id')
             resource_url = resource.get('url')
-            if not resource_id or not resource_url:
+            resource_type = resource.get('type')
+            if not resource_url or (resource_type == 'media' and not resource_id) or resource_type not in ('subtitle', 'media'):
+                continue
+
+            if resource_type == 'subtitle':
+                subtitles.setdefault(resource.get('language') or 'por', []).append({
+                    'url': resource_url,
+                })
                 continue
 
             security = self._download_json(
                 'http://security.video.globo.com/videos/%s/hash' % video_id,
                 video_id, 'Downloading security hash for %s' % resource_id, query={
-                    'player': 'flash',
-                    'version': '17.0.0.132',
+                    'player': 'desktop',
+                    'version': '5.19.1',
                     'resource_id': resource_id,
                 })
 
@@ -126,18 +133,23 @@ class GloboIE(InfoExtractor):
                 continue
 
             hash_code = security_hash[:2]
-            received_time = security_hash[2:12]
-            received_random = security_hash[12:22]
-            received_md5 = security_hash[22:]
-
-            sign_time = compat_str(int(received_time) + 86400)
             padding = '%010d' % random.randint(1, 10000000000)
-
-            md5_data = (received_md5 + sign_time + padding + '0xFF01DD').encode()
+            if hash_code in ('04', '14'):
+                received_time = security_hash[3:13]
+                received_md5 = security_hash[24:]
+                hash_prefix = security_hash[:23]
+            elif hash_code in ('02', '12', '03', '13'):
+                received_time = security_hash[2:12]
+                received_md5 = security_hash[22:]
+                padding += '1'
+                hash_prefix = '05' + security_hash[:22]
+
+            padded_sign_time = compat_str(int(received_time) + 86400) + padding
+            md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode()
             signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
-            signed_hash = hash_code + received_time + received_random + sign_time + padding + signed_md5
+            signed_hash = hash_prefix + padded_sign_time + signed_md5
+            signed_url = '%s?h=%s&k=html5&a=%s&u=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A', security.get('user') or '')
 
-            signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
             if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
                 formats.extend(self._extract_m3u8_formats(
                     signed_url, resource_id, 'mp4', entry_protocol='m3u8_native',
@@ -167,7 +179,8 @@ class GloboIE(InfoExtractor):
             'duration': duration,
             'uploader': uploader,
             'uploader_id': uploader_id,
-            'formats': formats
+            'formats': formats,
+            'subtitles': subtitles,
         }