[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / globo.py
index 730deda6b9a172cdddb6166223c52a574024830c..60d842d3a81e801fc9cd11c721c8d14fc9591fb9 100644 (file)
@@ -8,7 +8,10 @@ import random
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
+    compat_HTTPError,
+    compat_str,
+)
 from ..utils import (
     ExtractorError,
     float_or_none,
@@ -20,7 +23,7 @@ from ..utils import (
 
 class GloboIE(InfoExtractor):
     _VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
-    _LOGGED_IN = False
+    _NETRC_MACHINE = 'globo'
     _TESTS = [{
         'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
         'md5': 'b3ccc801f75cd04a914d51dadb83a78d',
@@ -64,24 +67,28 @@ class GloboIE(InfoExtractor):
     }]
 
     def _real_initialize(self):
-        if self._LOGGED_IN:
-            return
-
         email, password = self._get_login_info()
         if email is None:
             return
 
-        self._download_json(
-            'https://login.globo.com/api/authentication', None, data=json.dumps({
-                'payload': {
-                    'email': email,
-                    'password': password,
-                    'serviceId': 4654,
-                },
-            }).encode(), headers={
-                'Content-Type': 'application/json; charset=utf-8',
-            })
-        self._LOGGED_IN = True
+        try:
+            glb_id = (self._download_json(
+                'https://login.globo.com/api/authentication', None, data=json.dumps({
+                    'payload': {
+                        'email': email,
+                        'password': password,
+                        'serviceId': 4654,
+                    },
+                }).encode(), headers={
+                    'Content-Type': 'application/json; charset=utf-8',
+                }) or {}).get('glbId')
+            if glb_id:
+                self._set_cookie('.globo.com', 'GLBID', glb_id)
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+                resp = self._parse_json(e.cause.read(), None)
+                raise ExtractorError(resp.get('userMessage') or resp['id'], expected=True)
+            raise
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -89,21 +96,31 @@ class GloboIE(InfoExtractor):
         video = self._download_json(
             'http://api.globovideos.com/videos/%s/playlist' % video_id,
             video_id)['videos'][0]
+        if video.get('encrypted') is True:
+            raise ExtractorError('This video is DRM protected.', expected=True)
 
         title = video['title']
 
         formats = []
+        subtitles = {}
         for resource in video['resources']:
             resource_id = resource.get('_id')
             resource_url = resource.get('url')
-            if not resource_id or not resource_url:
+            resource_type = resource.get('type')
+            if not resource_url or (resource_type == 'media' and not resource_id) or resource_type not in ('subtitle', 'media'):
+                continue
+
+            if resource_type == 'subtitle':
+                subtitles.setdefault(resource.get('language') or 'por', []).append({
+                    'url': resource_url,
+                })
                 continue
 
             security = self._download_json(
                 'http://security.video.globo.com/videos/%s/hash' % video_id,
                 video_id, 'Downloading security hash for %s' % resource_id, query={
-                    'player': 'flash',
-                    'version': '17.0.0.132',
+                    'player': 'desktop',
+                    'version': '5.19.1',
                     'resource_id': resource_id,
                 })
 
@@ -116,18 +133,23 @@ class GloboIE(InfoExtractor):
                 continue
 
             hash_code = security_hash[:2]
-            received_time = int(security_hash[2:12])
-            received_random = security_hash[12:22]
-            received_md5 = security_hash[22:]
-
-            sign_time = received_time + 86400
             padding = '%010d' % random.randint(1, 10000000000)
-
-            md5_data = (received_md5 + str(sign_time) + padding + '0xFF01DD').encode()
+            if hash_code in ('04', '14'):
+                received_time = security_hash[3:13]
+                received_md5 = security_hash[24:]
+                hash_prefix = security_hash[:23]
+            elif hash_code in ('02', '12', '03', '13'):
+                received_time = security_hash[2:12]
+                received_md5 = security_hash[22:]
+                padding += '1'
+                hash_prefix = '05' + security_hash[:22]
+
+            padded_sign_time = compat_str(int(received_time) + 86400) + padding
+            md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode()
             signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
-            signed_hash = hash_code + compat_str(received_time) + received_random + compat_str(sign_time) + padding + signed_md5
+            signed_hash = hash_prefix + padded_sign_time + signed_md5
+            signed_url = '%s?h=%s&k=html5&a=%s&u=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A', security.get('user') or '')
 
-            signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
             if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
                 formats.extend(self._extract_m3u8_formats(
                     signed_url, resource_id, 'mp4', entry_protocol='m3u8_native',
@@ -157,7 +179,8 @@ class GloboIE(InfoExtractor):
             'duration': duration,
             'uploader': uploader,
             'uploader_id': uploader_id,
-            'formats': formats
+            'formats': formats,
+            'subtitles': subtitles,
         }