[atresplayer] fix extraction(closes #16277)(closes #16716)
[youtube-dl] / youtube_dl / extractor / atresplayer.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..compat import compat_HTTPError
7 from ..utils import (
8     ExtractorError,
9     int_or_none,
10     urlencode_postdata,
11 )
12
13
14 class AtresPlayerIE(InfoExtractor):
15     _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
16     _NETRC_MACHINE = 'atresplayer'
17     _TESTS = [
18         {
19             'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/',
20             'info_dict': {
21                 'id': '5d4aa2c57ed1a88fc715a615',
22                 'ext': 'mp4',
23                 'title': 'CapĂ­tulo 7: Asuntos pendientes',
24                 'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
25                 'duration': 3413,
26             },
27             'params': {
28                 'format': 'bestvideo',
29             },
30             'skip': 'This video is only available for registered users'
31         },
32         {
33             'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
34             'only_matching': True,
35         },
36         {
37             'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
38             'only_matching': True,
39         },
40     ]
41     _API_BASE = 'https://api.atresplayer.com/'
42
43     def _real_initialize(self):
44         self._login()
45
46     def _handle_error(self, e, code):
47         if isinstance(e.cause, compat_HTTPError) and e.cause.code == code:
48             error = self._parse_json(e.cause.read(), None)
49             if error.get('error') == 'required_registered':
50                 self.raise_login_required()
51             raise ExtractorError(error['error_description'], expected=True)
52         raise
53
54     def _login(self):
55         username, password = self._get_login_info()
56         if username is None:
57             return
58
59         self._request_webpage(
60             self._API_BASE + 'login', None, 'Downloading login page')
61
62         try:
63             target_url = self._download_json(
64                 'https://account.atresmedia.com/api/login', None,
65                 'Logging in', headers={
66                     'Content-Type': 'application/x-www-form-urlencoded'
67                 }, data=urlencode_postdata({
68                     'username': username,
69                     'password': password,
70                 }))['targetUrl']
71         except ExtractorError as e:
72             self._handle_error(e, 400)
73
74         self._request_webpage(target_url, None, 'Following Target URL')
75
76     def _real_extract(self, url):
77         display_id, video_id = re.match(self._VALID_URL, url).groups()
78
79         try:
80             episode = self._download_json(
81                 self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
82         except ExtractorError as e:
83             self._handle_error(e, 403)
84
85         title = episode['titulo']
86
87         formats = []
88         for source in episode.get('sources', []):
89             src = source.get('src')
90             if not src:
91                 continue
92             src_type = source.get('type')
93             if src_type == 'application/vnd.apple.mpegurl':
94                 formats.extend(self._extract_m3u8_formats(
95                     src, video_id, 'mp4', 'm3u8_native',
96                     m3u8_id='hls', fatal=False))
97             elif src_type == 'application/dash+xml':
98                 formats.extend(self._extract_mpd_formats(
99                     src, video_id, mpd_id='dash', fatal=False))
100         self._sort_formats(formats)
101
102         heartbeat = episode.get('heartbeat') or {}
103         omniture = episode.get('omniture') or {}
104         get_meta = lambda x: heartbeat.get(x) or omniture.get(x)
105
106         return {
107             'display_id': display_id,
108             'id': video_id,
109             'title': title,
110             'description': episode.get('descripcion'),
111             'thumbnail': episode.get('imgPoster'),
112             'duration': int_or_none(episode.get('duration')),
113             'formats': formats,
114             'channel': get_meta('channel'),
115             'season': get_meta('season'),
116             'episode_number': int_or_none(get_meta('episodeNumber')),
117         }