[adobepass] add specific options for adobe pass authentication
[youtube-dl] / youtube_dl / extractor / adobepass.py
1 # -*- coding: utf-8 -*-
2 from __future__ import unicode_literals
3
4 import re
5 import time
6 import xml.etree.ElementTree as etree
7
8 from .common import InfoExtractor
9 from ..compat import compat_urlparse
10 from ..utils import (
11     unescapeHTML,
12     urlencode_postdata,
13     unified_timestamp,
14     ExtractorError,
15 )
16
17
18 MSO_INFO = {
19     'DTV': {
20         'name': 'DirecTV',
21         'username_field': 'username',
22         'password_field': 'password',
23     },
24     'Rogers': {
25         'name': 'Rogers Cable',
26         'username_field': 'UserName',
27         'password_field': 'UserPassword',
28     },
29 }
30
31
32 class AdobePassIE(InfoExtractor):
33     _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
34     _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
35
36     @staticmethod
37     def _get_mvpd_resource(provider_id, title, guid, rating):
38         channel = etree.Element('channel')
39         channel_title = etree.SubElement(channel, 'title')
40         channel_title.text = provider_id
41         item = etree.SubElement(channel, 'item')
42         resource_title = etree.SubElement(item, 'title')
43         resource_title.text = title
44         resource_guid = etree.SubElement(item, 'guid')
45         resource_guid.text = guid
46         resource_rating = etree.SubElement(item, 'media:rating')
47         resource_rating.attrib = {'scheme': 'urn:v-chip'}
48         resource_rating.text = rating
49         return '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">' + etree.tostring(channel).decode() + '</rss>'
50
51     def _extract_mvpd_auth(self, url, video_id, requestor_id, resource):
52         def xml_text(xml_str, tag):
53             return self._search_regex(
54                 '<%s>(.+?)</%s>' % (tag, tag), xml_str, tag)
55
56         def is_expired(token, date_ele):
57             token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele)))
58             return token_expires and token_expires <= int(time.time())
59
60         def post_form(form_page_res, note, data={}):
61             form_page, urlh = form_page_res
62             post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
63             if not re.match(r'https?://', post_url):
64                 post_url = compat_urlparse.urljoin(urlh.geturl(), post_url)
65             form_data = self._hidden_inputs(form_page)
66             form_data.update(data)
67             return self._download_webpage_handle(
68                 post_url, video_id, note, data=urlencode_postdata(form_data), headers={
69                     'Content-Type': 'application/x-www-form-urlencoded',
70                 })
71
72         def raise_mvpd_required():
73             raise ExtractorError(
74                 'This video is only available for users of participating TV providers. '
75                 'Use --ap-mso-id to specify Adobe Pass Multiple-system operator Identifier '
76                 'and --netrc to provide account credentials.', expected=True)
77
78         mvpd_headers = {
79             'ap_42': 'anonymous',
80             'ap_11': 'Linux i686',
81             'ap_z': self._USER_AGENT,
82             'User-Agent': self._USER_AGENT,
83         }
84
85         guid = xml_text(resource, 'guid')
86         retries = self._downloader.params.get('ap_retries', 3)
87         count = 0
88         while count < retries:
89             requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {}
90             authn_token = requestor_info.get('authn_token')
91             if authn_token and is_expired(authn_token, 'simpleTokenExpires'):
92                 authn_token = None
93             if not authn_token:
94                 # TODO add support for other TV Providers
95                 mso_id = self._downloader.params.get('ap_mso_id')
96                 if not mso_id:
97                     raise_mvpd_required()
98                 if mso_id not in MSO_INFO:
99                     raise ExtractorError(
100                         'Unsupported TV Provider, use --list-ap-mso-ids to get a list of supported TV Providers' % mso_id, expected=True)
101                 username, password = self._get_login_info('ap_username', 'ap_password', mso_id)
102                 if not username or not password:
103                     raise_mvpd_required()
104                 mso_info = MSO_INFO[mso_id]
105
106                 provider_redirect_page_res = self._download_webpage_handle(
107                     self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
108                     'Downloading Provider Redirect Page', query={
109                         'noflash': 'true',
110                         'mso_id': mso_id,
111                         'requestor_id': requestor_id,
112                         'no_iframe': 'false',
113                         'domain_name': 'adobe.com',
114                         'redirect_url': url,
115                     })
116                 provider_login_page_res = post_form(
117                     provider_redirect_page_res, 'Downloading Provider Login Page')
118                 mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
119                     mso_info['username_field']: username,
120                     mso_info['password_field']: password,
121                 })
122                 if mso_id == 'DTV':
123                     post_form(mvpd_confirm_page_res, 'Confirming Login')
124
125                 session = self._download_webpage(
126                     self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
127                     'Retrieving Session', data=urlencode_postdata({
128                         '_method': 'GET',
129                         'requestor_id': requestor_id,
130                     }), headers=mvpd_headers)
131                 if '<pendingLogout' in session:
132                     self._downloader.cache.store('mvpd', requestor_id, {})
133                     count += 1
134                     continue
135                 authn_token = unescapeHTML(xml_text(session, 'authnToken'))
136                 requestor_info['authn_token'] = authn_token
137                 self._downloader.cache.store('mvpd', requestor_id, requestor_info)
138
139             authz_token = requestor_info.get(guid)
140             if authz_token and is_expired(authz_token, 'simpleTokenTTL'):
141                 authz_token = None
142             if not authz_token:
143                 authorize = self._download_webpage(
144                     self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
145                     'Retrieving Authorization Token', data=urlencode_postdata({
146                         'resource_id': resource,
147                         'requestor_id': requestor_id,
148                         'authentication_token': authn_token,
149                         'mso_id': xml_text(authn_token, 'simpleTokenMsoID'),
150                         'userMeta': '1',
151                     }), headers=mvpd_headers)
152                 if '<pendingLogout' in authorize:
153                     self._downloader.cache.store('mvpd', requestor_id, {})
154                     count += 1
155                     continue
156                 authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
157                 requestor_info[guid] = authz_token
158                 self._downloader.cache.store('mvpd', requestor_id, requestor_info)
159
160             mvpd_headers.update({
161                 'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
162                 'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'),
163             })
164
165             short_authorize = self._download_webpage(
166                 self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize',
167                 video_id, 'Retrieving Media Token', data=urlencode_postdata({
168                     'authz_token': authz_token,
169                     'requestor_id': requestor_id,
170                     'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'),
171                     'hashed_guid': 'false',
172                 }), headers=mvpd_headers)
173             if '<pendingLogout' in short_authorize:
174                 self._downloader.cache.store('mvpd', requestor_id, {})
175                 count += 1
176                 continue
177             return short_authorize