[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / scrippsnetworks.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import json
5 import hashlib
6 import re
7
8 from .aws import AWSIE
9 from .anvato import AnvatoIE
10 from .common import InfoExtractor
11 from ..utils import (
12     smuggle_url,
13     urlencode_postdata,
14     xpath_text,
15 )
16
17
18 class ScrippsNetworksWatchIE(AWSIE):
19     IE_NAME = 'scrippsnetworks:watch'
20     _VALID_URL = r'''(?x)
21                     https?://
22                         watch\.
23                         (?P<site>geniuskitchen)\.com/
24                         (?:
25                             player\.[A-Z0-9]+\.html\#|
26                             show/(?:[^/]+/){2}|
27                             player/
28                         )
29                         (?P<id>\d+)
30                     '''
31     _TESTS = [{
32         'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
33         'info_dict': {
34             'id': '4194875',
35             'ext': 'mp4',
36             'title': 'Ample Hills Ice Cream Bike',
37             'description': 'Courtney Rada churns up a signature GK Now ice cream with The Scoopmaster.',
38             'uploader': 'ANV',
39             'upload_date': '20171011',
40             'timestamp': 1507698000,
41         },
42         'params': {
43             'skip_download': True,
44         },
45         'add_ie': [AnvatoIE.ie_key()],
46     }]
47
48     _SNI_TABLE = {
49         'geniuskitchen': 'genius',
50     }
51
52     _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1'
53     _AWS_PROXY_HOST = 'web.api.video.snidigital.com'
54
55     _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback'
56
57     def _real_extract(self, url):
58         mobj = re.match(self._VALID_URL, url)
59         site_id, video_id = mobj.group('site', 'id')
60
61         aws_identity_id_json = json.dumps({
62             'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION
63         }).encode('utf-8')
64         token = self._download_json(
65             'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id,
66             data=aws_identity_id_json,
67             headers={
68                 'Accept': '*/*',
69                 'Content-Type': 'application/x-amz-json-1.1',
70                 'Referer': url,
71                 'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(),
72                 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken',
73                 'X-Amz-User-Agent': self._AWS_USER_AGENT,
74             })['Token']
75
76         sts = self._download_xml(
77             'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({
78                 'Action': 'AssumeRoleWithWebIdentity',
79                 'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role',
80                 'RoleSessionName': 'web-identity',
81                 'Version': '2011-06-15',
82                 'WebIdentityToken': token,
83             }), headers={
84                 'Referer': url,
85                 'X-Amz-User-Agent': self._AWS_USER_AGENT,
86                 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
87             })
88
89         def get(key):
90             return xpath_text(
91                 sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key,
92                 fatal=True)
93
94         mcp_id = self._aws_execute_api({
95             'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id),
96             'access_key': get('AccessKeyId'),
97             'secret_key': get('SecretAccessKey'),
98             'session_token': get('SessionToken'),
99         }, video_id)['results'][0]['mcpId']
100
101         return self.url_result(
102             smuggle_url(
103                 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id,
104                 {'geo_countries': ['US']}),
105             AnvatoIE.ie_key(), video_id=mcp_id)
106
107
108 class ScrippsNetworksIE(InfoExtractor):
109     _VALID_URL = r'https?://(?:www\.)?(?P<site>cookingchanneltv|discovery|(?:diy|food)network|hgtv|travelchannel)\.com/videos/[0-9a-z-]+-(?P<id>\d+)'
110     _TESTS = [{
111         'url': 'https://www.cookingchanneltv.com/videos/the-best-of-the-best-0260338',
112         'info_dict': {
113             'id': '0260338',
114             'ext': 'mp4',
115             'title': 'The Best of the Best',
116             'description': 'Catch a new episode of MasterChef Canada Tuedsay at 9/8c.',
117             'timestamp': 1475678834,
118             'upload_date': '20161005',
119             'uploader': 'SCNI-SCND',
120         },
121         'add_ie': ['ThePlatform'],
122     }, {
123         'url': 'https://www.diynetwork.com/videos/diy-barnwood-tablet-stand-0265790',
124         'only_matching': True,
125     }, {
126         'url': 'https://www.foodnetwork.com/videos/chocolate-strawberry-cake-roll-7524591',
127         'only_matching': True,
128     }, {
129         'url': 'https://www.hgtv.com/videos/cookie-decorating-101-0301929',
130         'only_matching': True,
131     }, {
132         'url': 'https://www.travelchannel.com/videos/two-climates-one-bag-5302184',
133         'only_matching': True,
134     }, {
135         'url': 'https://www.discovery.com/videos/guardians-of-the-glades-cooking-with-tom-cobb-5578368',
136         'only_matching': True,
137     }]
138     _ACCOUNT_MAP = {
139         'cookingchanneltv': 2433005105,
140         'discovery': 2706091867,
141         'diynetwork': 2433004575,
142         'foodnetwork': 2433005105,
143         'hgtv': 2433004575,
144         'travelchannel': 2433005739,
145     }
146     _TP_TEMPL = 'https://link.theplatform.com/s/ip77QC/media/guid/%d/%s?mbr=true'
147
148     def _real_extract(self, url):
149         site, guid = re.match(self._VALID_URL, url).groups()
150         return self.url_result(smuggle_url(
151             self._TP_TEMPL % (self._ACCOUNT_MAP[site], guid),
152             {'force_smil_url': True}), 'ThePlatform', guid)