[animeondemand] Add test
[youtube-dl] / youtube_dl / extractor / animeondemand.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..compat import compat_urlparse
7 from ..utils import (
8     determine_ext,
9     encode_dict,
10     ExtractorError,
11     sanitized_Request,
12     urlencode_postdata,
13 )
14
15
16 class AnimeOnDemandIE(InfoExtractor):
17     _VALID_URL = r'https?://(?:www\.)?anime-on-demand\.de/anime/(?P<id>\d+)'
18     _LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
19     _APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
20     _NETRC_MACHINE = 'animeondemand'
21     _TESTS = [{
22         'url': 'https://www.anime-on-demand.de/anime/161',
23         'info_dict': {
24             'id': '161',
25             'title': 'Grimgar, Ashes and Illusions (OmU)',
26             'description': 'md5:6681ce3c07c7189d255ac6ab23812d31',
27         },
28         'playlist_mincount': 4,
29     }, {
30         # Film wording is used instead of Episode
31         'url': 'https://www.anime-on-demand.de/anime/39',
32         'only_matching': True,
33     }]
34
35     def _login(self):
36         (username, password) = self._get_login_info()
37         if username is None:
38             return
39
40         login_page = self._download_webpage(
41             self._LOGIN_URL, None, 'Downloading login page')
42
43         login_form = self._form_hidden_inputs('new_user', login_page)
44
45         login_form.update({
46             'user[login]': username,
47             'user[password]': password,
48         })
49
50         post_url = self._search_regex(
51             r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
52             'post url', default=self._LOGIN_URL, group='url')
53
54         if not post_url.startswith('http'):
55             post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
56
57         request = sanitized_Request(
58             post_url, urlencode_postdata(encode_dict(login_form)))
59         request.add_header('Referer', self._LOGIN_URL)
60
61         response = self._download_webpage(
62             request, None, 'Logging in as %s' % username)
63
64         if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
65             error = self._search_regex(
66                 r'<p class="alert alert-danger">(.+?)</p>',
67                 response, 'error', default=None)
68             if error:
69                 raise ExtractorError('Unable to login: %s' % error, expected=True)
70             raise ExtractorError('Unable to log in')
71
72     def _real_initialize(self):
73         self._login()
74
75     def _real_extract(self, url):
76         anime_id = self._match_id(url)
77
78         webpage = self._download_webpage(url, anime_id)
79
80         if 'data-playlist=' not in webpage:
81             self._download_webpage(
82                 self._APPLY_HTML5_URL, anime_id,
83                 'Activating HTML5 beta', 'Unable to apply HTML5 beta')
84             webpage = self._download_webpage(url, anime_id)
85
86         csrf_token = self._html_search_meta(
87             'csrf-token', webpage, 'csrf token', fatal=True)
88
89         anime_title = self._html_search_regex(
90             r'(?s)<h1[^>]+itemprop="name"[^>]*>(.+?)</h1>',
91             webpage, 'anime name')
92         anime_description = self._html_search_regex(
93             r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>',
94             webpage, 'anime description', default=None)
95
96         entries = []
97
98         for episode_html in re.findall(r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage):
99             m = re.search(
100                 r'class="episodebox-title"[^>]+title="(?:Episode|Film)\s*(?P<number>\d+)\s*-\s*(?P<title>.+?)"', episode_html)
101             if not m:
102                 continue
103
104             episode_number = int(m.group('number'))
105             episode_title = m.group('title')
106             video_id = 'episode-%d' % episode_number
107
108             common_info = {
109                 'id': video_id,
110                 'series': anime_title,
111                 'episode': episode_title,
112                 'episode_number': episode_number,
113             }
114
115             formats = []
116
117             playlist_url = self._search_regex(
118                 r'data-playlist=(["\'])(?P<url>.+?)\1',
119                 episode_html, 'data playlist', default=None, group='url')
120             if playlist_url:
121                 request = sanitized_Request(
122                     compat_urlparse.urljoin(url, playlist_url),
123                     headers={
124                         'X-Requested-With': 'XMLHttpRequest',
125                         'X-CSRF-Token': csrf_token,
126                         'Referer': url,
127                         'Accept': 'application/json, text/javascript, */*; q=0.01',
128                     })
129
130                 playlist = self._download_json(
131                     request, video_id, 'Downloading playlist JSON', fatal=False)
132                 if playlist:
133                     playlist = playlist['playlist'][0]
134                     title = playlist['title']
135                     description = playlist.get('description')
136                     for source in playlist.get('sources', []):
137                         file_ = source.get('file')
138                         if file_ and determine_ext(file_) == 'm3u8':
139                             formats = self._extract_m3u8_formats(
140                                 file_, video_id, 'mp4',
141                                 entry_protocol='m3u8_native', m3u8_id='hls')
142
143             if formats:
144                 f = common_info.copy()
145                 f.update({
146                     'title': title,
147                     'description': description,
148                     'formats': formats,
149                 })
150                 entries.append(f)
151
152             m = re.search(
153                 r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<',
154                 episode_html)
155             if m:
156                 f = common_info.copy()
157                 f.update({
158                     'id': '%s-teaser' % f['id'],
159                     'title': m.group('title'),
160                     'url': compat_urlparse.urljoin(url, m.group('href')),
161                 })
162                 entries.append(f)
163
164         return self.playlist_result(entries, anime_id, anime_title, anime_description)