[adobepass] check for authz_token expiration(#10527)
[youtube-dl] / youtube_dl / extractor / adobepass.py
1 # -*- coding: utf-8 -*-
2 from __future__ import unicode_literals
3
4 import re
5 import time
6 import xml.etree.ElementTree as etree
7
8 from .common import InfoExtractor
9 from ..utils import (
10     unescapeHTML,
11     urlencode_postdata,
12     unified_timestamp,
13 )
14
15
16 class AdobePassIE(InfoExtractor):
17     _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
18     _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
19
20     @staticmethod
21     def _get_mvpd_resource(provider_id, title, guid, rating):
22         channel = etree.Element('channel')
23         channel_title = etree.SubElement(channel, 'title')
24         channel_title.text = provider_id
25         item = etree.SubElement(channel, 'item')
26         resource_title = etree.SubElement(item, 'title')
27         resource_title.text = title
28         resource_guid = etree.SubElement(item, 'guid')
29         resource_guid.text = guid
30         resource_rating = etree.SubElement(item, 'media:rating')
31         resource_rating.attrib = {'scheme': 'urn:v-chip'}
32         resource_rating.text = rating
33         return '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">' + etree.tostring(channel).decode() + '</rss>'
34
35     def _extract_mvpd_auth(self, url, video_id, requestor_id, resource):
36         def xml_text(xml_str, tag):
37             return self._search_regex(
38                 '<%s>(.+?)</%s>' % (tag, tag), xml_str, tag)
39
40         def is_expired(token, date_ele):
41             token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele)))
42             return token_expires and token_expires <= int(time.time())
43
44         mvpd_headers = {
45             'ap_42': 'anonymous',
46             'ap_11': 'Linux i686',
47             'ap_z': self._USER_AGENT,
48             'User-Agent': self._USER_AGENT,
49         }
50
51         guid = xml_text(resource, 'guid')
52         requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {}
53         authn_token = requestor_info.get('authn_token')
54         if authn_token and is_expired(authn_token, 'simpleTokenExpires'):
55             authn_token = None
56         if not authn_token:
57             # TODO add support for other TV Providers
58             mso_id = 'DTV'
59             username, password = self._get_netrc_login_info(mso_id)
60             if not username or not password:
61                 return ''
62
63             def post_form(form_page, note, data={}):
64                 post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
65                 return self._download_webpage(
66                     post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={
67                         'Content-Type': 'application/x-www-form-urlencoded',
68                     })
69
70             provider_redirect_page = self._download_webpage(
71                 self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
72                 'Downloading Provider Redirect Page', query={
73                     'noflash': 'true',
74                     'mso_id': mso_id,
75                     'requestor_id': requestor_id,
76                     'no_iframe': 'false',
77                     'domain_name': 'adobe.com',
78                     'redirect_url': url,
79                 })
80             provider_login_page = post_form(
81                 provider_redirect_page, 'Downloading Provider Login Page')
82             mvpd_confirm_page = post_form(provider_login_page, 'Logging in', {
83                 'username': username,
84                 'password': password,
85             })
86             post_form(mvpd_confirm_page, 'Confirming Login')
87
88             session = self._download_webpage(
89                 self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
90                 'Retrieving Session', data=urlencode_postdata({
91                     '_method': 'GET',
92                     'requestor_id': requestor_id,
93                 }), headers=mvpd_headers)
94             if '<pendingLogout' in session:
95                 self._downloader.cache.store('mvpd', requestor_id, {})
96                 return self._extract_mvpd_auth(url, video_id, requestor_id, resource)
97             authn_token = unescapeHTML(xml_text(session, 'authnToken'))
98             requestor_info['authn_token'] = authn_token
99             self._downloader.cache.store('mvpd', requestor_id, requestor_info)
100
101         authz_token = requestor_info.get(guid)
102         if authz_token and is_expired(authz_token, 'simpleTokenTTL'):
103             authz_token = None
104         if not authz_token:
105             authorize = self._download_webpage(
106                 self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
107                 'Retrieving Authorization Token', data=urlencode_postdata({
108                     'resource_id': resource,
109                     'requestor_id': requestor_id,
110                     'authentication_token': authn_token,
111                     'mso_id': xml_text(authn_token, 'simpleTokenMsoID'),
112                     'userMeta': '1',
113                 }), headers=mvpd_headers)
114             if '<pendingLogout' in authorize:
115                 self._downloader.cache.store('mvpd', requestor_id, {})
116                 return self._extract_mvpd_auth(url, video_id, requestor_id, resource)
117             authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
118             requestor_info[guid] = authz_token
119             self._downloader.cache.store('mvpd', requestor_id, requestor_info)
120
121         mvpd_headers.update({
122             'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
123             'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'),
124         })
125
126         short_authorize = self._download_webpage(
127             self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize',
128             video_id, 'Retrieving Media Token', data=urlencode_postdata({
129                 'authz_token': authz_token,
130                 'requestor_id': requestor_id,
131                 'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'),
132                 'hashed_guid': 'false',
133             }), headers=mvpd_headers)
134         if '<pendingLogout' in short_authorize:
135             self._downloader.cache.store('mvpd', requestor_id, {})
136             return self._extract_mvpd_auth(url, video_id, requestor_id, resource)
137         return short_authorize