[adobepass] add an option to specify mso_id and support for ROGERS TV Provider(closes...
[youtube-dl] / youtube_dl / extractor / adobepass.py
1 # -*- coding: utf-8 -*-
2 from __future__ import unicode_literals
3
4 import re
5 import time
6 import xml.etree.ElementTree as etree
7
8 from .common import InfoExtractor
9 from ..compat import compat_urlparse
10 from ..utils import (
11     unescapeHTML,
12     urlencode_postdata,
13     unified_timestamp,
14     ExtractorError,
15 )
16
17
18 class AdobePassIE(InfoExtractor):
19     _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
20     _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
21
22     @staticmethod
23     def _get_mvpd_resource(provider_id, title, guid, rating):
24         channel = etree.Element('channel')
25         channel_title = etree.SubElement(channel, 'title')
26         channel_title.text = provider_id
27         item = etree.SubElement(channel, 'item')
28         resource_title = etree.SubElement(item, 'title')
29         resource_title.text = title
30         resource_guid = etree.SubElement(item, 'guid')
31         resource_guid.text = guid
32         resource_rating = etree.SubElement(item, 'media:rating')
33         resource_rating.attrib = {'scheme': 'urn:v-chip'}
34         resource_rating.text = rating
35         return '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">' + etree.tostring(channel).decode() + '</rss>'
36
37     def _extract_mvpd_auth(self, url, video_id, requestor_id, resource):
38         def xml_text(xml_str, tag):
39             return self._search_regex(
40                 '<%s>(.+?)</%s>' % (tag, tag), xml_str, tag)
41
42         def is_expired(token, date_ele):
43             token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele)))
44             return token_expires and token_expires <= int(time.time())
45
46         def raise_mvpd_required():
47             raise ExtractorError('This video is only available for users of participating TV providers. '
48                 'Use --ap-mso-id to specify Adobe Pass Multiple-system operator Identifier '
49                 'and --netrc to provide account credentials.', expected=True)
50
51         mvpd_headers = {
52             'ap_42': 'anonymous',
53             'ap_11': 'Linux i686',
54             'ap_z': self._USER_AGENT,
55             'User-Agent': self._USER_AGENT,
56         }
57
58         guid = xml_text(resource, 'guid')
59         requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {}
60         authn_token = requestor_info.get('authn_token')
61         if authn_token and is_expired(authn_token, 'simpleTokenExpires'):
62             authn_token = None
63         if not authn_token:
64             # TODO add support for other TV Providers
65             mso_id = self._downloader.params.get('ap_mso_id')
66             if not mso_id:
67                 raise_mvpd_required()
68             username, password = self._get_netrc_login_info(mso_id)
69             if not username or not password:
70                 return raise_mvpd_required()
71
72             def post_form(form_page_res, note, data={}):
73                 form_page, urlh = form_page_res
74                 post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
75                 if not re.match(r'https?://', post_url):
76                     post_url = compat_urlparse.urljoin(urlh.geturl(), post_url)
77                 form_data = self._hidden_inputs(form_page)
78                 form_data.update(data)
79                 return self._download_webpage_handle(
80                     post_url, video_id, note, data=urlencode_postdata(form_data), headers={
81                         'Content-Type': 'application/x-www-form-urlencoded',
82                     })
83
84             provider_redirect_page_res = self._download_webpage_handle(
85                 self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
86                 'Downloading Provider Redirect Page', query={
87                     'noflash': 'true',
88                     'mso_id': mso_id,
89                     'requestor_id': requestor_id,
90                     'no_iframe': 'false',
91                     'domain_name': 'adobe.com',
92                     'redirect_url': url,
93                 })
94             provider_login_page_res = post_form(
95                 provider_redirect_page_res, 'Downloading Provider Login Page')
96             login_data = {}
97             if mso_id == 'DTV':
98                 login_data = {
99                     'username': username,
100                     'password': password,
101                 }
102             elif mso_id == 'Rogers':
103                 login_data = {
104                     'UserName': username,
105                     'UserPassword': password,
106                 }
107             mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', login_data)
108             if mso_id == 'DTV':
109                 post_form(mvpd_confirm_page_res, 'Confirming Login')
110
111             session = self._download_webpage(
112                 self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
113                 'Retrieving Session', data=urlencode_postdata({
114                     '_method': 'GET',
115                     'requestor_id': requestor_id,
116                 }), headers=mvpd_headers)
117             if '<pendingLogout' in session:
118                 self._downloader.cache.store('mvpd', requestor_id, {})
119                 return self._extract_mvpd_auth(url, video_id, requestor_id, resource)
120             authn_token = unescapeHTML(xml_text(session, 'authnToken'))
121             requestor_info['authn_token'] = authn_token
122             self._downloader.cache.store('mvpd', requestor_id, requestor_info)
123
124         authz_token = requestor_info.get(guid)
125         if authz_token and is_expired(authz_token, 'simpleTokenTTL'):
126             authz_token = None
127         if not authz_token:
128             authorize = self._download_webpage(
129                 self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
130                 'Retrieving Authorization Token', data=urlencode_postdata({
131                     'resource_id': resource,
132                     'requestor_id': requestor_id,
133                     'authentication_token': authn_token,
134                     'mso_id': xml_text(authn_token, 'simpleTokenMsoID'),
135                     'userMeta': '1',
136                 }), headers=mvpd_headers)
137             if '<pendingLogout' in authorize:
138                 self._downloader.cache.store('mvpd', requestor_id, {})
139                 return self._extract_mvpd_auth(url, video_id, requestor_id, resource)
140             authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
141             requestor_info[guid] = authz_token
142             self._downloader.cache.store('mvpd', requestor_id, requestor_info)
143
144         mvpd_headers.update({
145             'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
146             'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'),
147         })
148
149         short_authorize = self._download_webpage(
150             self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize',
151             video_id, 'Retrieving Media Token', data=urlencode_postdata({
152                 'authz_token': authz_token,
153                 'requestor_id': requestor_id,
154                 'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'),
155                 'hashed_guid': 'false',
156             }), headers=mvpd_headers)
157         if '<pendingLogout' in short_authorize:
158             self._downloader.cache.store('mvpd', requestor_id, {})
159             return self._extract_mvpd_auth(url, video_id, requestor_id, resource)
160         return short_authorize