[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / atresplayer.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_HTTPError
8 from ..utils import (
9     ExtractorError,
10     int_or_none,
11     urlencode_postdata,
12 )
13
14
15 class AtresPlayerIE(InfoExtractor):
16     _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
17     _NETRC_MACHINE = 'atresplayer'
18     _TESTS = [
19         {
20             'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/',
21             'info_dict': {
22                 'id': '5d4aa2c57ed1a88fc715a615',
23                 'ext': 'mp4',
24                 'title': 'CapĂ­tulo 7: Asuntos pendientes',
25                 'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
26                 'duration': 3413,
27             },
28             'params': {
29                 'format': 'bestvideo',
30             },
31             'skip': 'This video is only available for registered users'
32         },
33         {
34             'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
35             'only_matching': True,
36         },
37         {
38             'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
39             'only_matching': True,
40         },
41     ]
42     _API_BASE = 'https://api.atresplayer.com/'
43
44     def _real_initialize(self):
45         self._login()
46
47     def _handle_error(self, e, code):
48         if isinstance(e.cause, compat_HTTPError) and e.cause.code == code:
49             error = self._parse_json(e.cause.read(), None)
50             if error.get('error') == 'required_registered':
51                 self.raise_login_required()
52             raise ExtractorError(error['error_description'], expected=True)
53         raise
54
55     def _login(self):
56         username, password = self._get_login_info()
57         if username is None:
58             return
59
60         self._request_webpage(
61             self._API_BASE + 'login', None, 'Downloading login page')
62
63         try:
64             target_url = self._download_json(
65                 'https://account.atresmedia.com/api/login', None,
66                 'Logging in', headers={
67                     'Content-Type': 'application/x-www-form-urlencoded'
68                 }, data=urlencode_postdata({
69                     'username': username,
70                     'password': password,
71                 }))['targetUrl']
72         except ExtractorError as e:
73             self._handle_error(e, 400)
74
75         self._request_webpage(target_url, None, 'Following Target URL')
76
77     def _real_extract(self, url):
78         display_id, video_id = re.match(self._VALID_URL, url).groups()
79
80         try:
81             episode = self._download_json(
82                 self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
83         except ExtractorError as e:
84             self._handle_error(e, 403)
85
86         title = episode['titulo']
87
88         formats = []
89         for source in episode.get('sources', []):
90             src = source.get('src')
91             if not src:
92                 continue
93             src_type = source.get('type')
94             if src_type == 'application/vnd.apple.mpegurl':
95                 formats.extend(self._extract_m3u8_formats(
96                     src, video_id, 'mp4', 'm3u8_native',
97                     m3u8_id='hls', fatal=False))
98             elif src_type == 'application/dash+xml':
99                 formats.extend(self._extract_mpd_formats(
100                     src, video_id, mpd_id='dash', fatal=False))
101         self._sort_formats(formats)
102
103         heartbeat = episode.get('heartbeat') or {}
104         omniture = episode.get('omniture') or {}
105         get_meta = lambda x: heartbeat.get(x) or omniture.get(x)
106
107         return {
108             'display_id': display_id,
109             'id': video_id,
110             'title': title,
111             'description': episode.get('descripcion'),
112             'thumbnail': episode.get('imgPoster'),
113             'duration': int_or_none(episode.get('duration')),
114             'formats': formats,
115             'channel': get_meta('channel'),
116             'season': get_meta('season'),
117             'episode_number': int_or_none(get_meta('episodeNumber')),
118         }