[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / aws.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import datetime
5 import hashlib
6 import hmac
7
8 from .common import InfoExtractor
9 from ..compat import compat_urllib_parse_urlencode
10
11
12 class AWSIE(InfoExtractor):
13     _AWS_ALGORITHM = 'AWS4-HMAC-SHA256'
14     _AWS_REGION = 'us-east-1'
15
16     def _aws_execute_api(self, aws_dict, video_id, query=None):
17         query = query or {}
18         amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
19         date = amz_date[:8]
20         headers = {
21             'Accept': 'application/json',
22             'Host': self._AWS_PROXY_HOST,
23             'X-Amz-Date': amz_date,
24             'X-Api-Key': self._AWS_API_KEY
25         }
26         session_token = aws_dict.get('session_token')
27         if session_token:
28             headers['X-Amz-Security-Token'] = session_token
29
30         def aws_hash(s):
31             return hashlib.sha256(s.encode('utf-8')).hexdigest()
32
33         # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
34         canonical_querystring = compat_urllib_parse_urlencode(query)
35         canonical_headers = ''
36         for header_name, header_value in sorted(headers.items()):
37             canonical_headers += '%s:%s\n' % (header_name.lower(), header_value)
38         signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())])
39         canonical_request = '\n'.join([
40             'GET',
41             aws_dict['uri'],
42             canonical_querystring,
43             canonical_headers,
44             signed_headers,
45             aws_hash('')
46         ])
47
48         # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
49         credential_scope_list = [date, self._AWS_REGION, 'execute-api', 'aws4_request']
50         credential_scope = '/'.join(credential_scope_list)
51         string_to_sign = '\n'.join([self._AWS_ALGORITHM, amz_date, credential_scope, aws_hash(canonical_request)])
52
53         # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
54         def aws_hmac(key, msg):
55             return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
56
57         def aws_hmac_digest(key, msg):
58             return aws_hmac(key, msg).digest()
59
60         def aws_hmac_hexdigest(key, msg):
61             return aws_hmac(key, msg).hexdigest()
62
63         k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8')
64         for value in credential_scope_list:
65             k_signing = aws_hmac_digest(k_signing, value)
66
67         signature = aws_hmac_hexdigest(k_signing, string_to_sign)
68
69         # Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html
70         headers['Authorization'] = ', '.join([
71             '%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
72             'SignedHeaders=%s' % signed_headers,
73             'Signature=%s' % signature,
74         ])
75
76         return self._download_json(
77             'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
78             video_id, headers=headers)